#! /bin/csh -f
#
echo "Scaler Elves v 1.2.5    Because you have better things to do.(TM)   James Holton 11-1-06"
echo ""
#
#   Intelligently scale data from several wavelengths/runs against each other, 
#   And then merge each wavelength separately.
#
#   Accepts input from mosflm (*.mtz) or denzo (*.x) and
#   Produces output files suitable for SHELX, SHARP, SOLVE, and X-PLOR.
#
# Try it!  Let me know what you think.
#
#   jamesh@ucxray.berkeley.edu
#
set temp = `find $0 -mtime +30 -print`
if(("$temp" == "")&&($#argv == 0)) cat << EOF-lawyers
Copyright 1999. The Regents of the University of California (Regents). All Rights Reserved. 

     Permission to use, copy, modify, and distribute this software and its 
     documentation for educational, research, and not-for-profit purposes, 
     without fee and without a signed licensing agreement, is hereby granted, 
     provided that the above copyright notice, this paragraph and the following 
     two paragraphs appear in all copies, modifications, and distributions. 
     Contact The Office of Technology Licensing, UC Berkeley, 2150 Shattuck 
     Avenue, Suite 510, Berkeley, CA 94720-1620, (510) 643-7201, for commercial 
     licensing opportunities. Created by James Holton, Department of Molecular 
     and Cell Biology, University of California, Berkeley. 
  
     IN NO EVENT SHALL REGENTS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, 
     SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, 
     ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 
     REGENTS HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  
     REGENTS SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED 
     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
     PURPOSE. THE SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED 
     HEREUNDER IS PROVIDED "AS IS". REGENTS HAS NO OBLIGATION TO PROVIDE
     MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 

EOF-lawyers
#
#   upon its distribution, this file was:
#	lines: 15436
#	bytes: 434403
#   altered versions are not supported by JMH
#   
#   
############################################################################

###############################################################################
#
#   Evaluate unix system
#
###############################################################################
# make sure nawk works
set program = "nawk"
foreach name ( nawk awk gawk )
    test -x "$program"
    if(! $status) break
    
    set possibilities = `which $name |& grep -v ' not in ' | tail -1`
    foreach file ( $possibilities )
	test -x "$file"
	if(! $status) then
	    # test for desired functionality (change this?)
	    set temp = `echo "1.54" | $file '{printf("%3d", 3147.7 * ( $1 )^(-3.014))}' |& cat`
	    if("$temp" == 856) then
		set program = "$file"
		break
	    endif
	endif
    end
    unset possibilities
end
test -x "$program"
if($status) then
    set program = "awk"
    foreach place ( /bin /usr/bin /usr/local/bin  )
	test -x "$program"
	if(($status)&&(-e $place)) then
	    # keep looking
	    set files = `ls -1L ${place} |& grep "$program" |& sort -4n |& head -20 `
	    foreach file ( $files )
		# test for desired functionality
		set temp = `echo "1.54" | $file '{printf("%3d", 3147.7 * ( $1 )^(-3.014))}' |& cat`
		if("$temp" == 856) then
		    set program = "$file"
		    break
		endif
	    end
	endif
    end
endif

# agressively search for nawk in likely places
test -x "$program"
if($status) then
    echo -n "Looking for $program "
    foreach place ( /bin /usr/bin /usr/local/bin /usr / )
	test -x "$program"
	if(($status)&&(-e $place)) then
	    if("$place" == "/") echo -n "uhh"
	    
	    # use find to get candidate files
	    set files = `find $place -name '*'$program \( -type l -o \( -type f -size +10000c \) \) -perm -1 -print |& egrep -v "^find:" |& head -20`
	    foreach file ( $files )
		# test for desired functionality
		set temp = `echo "1.54" | $file '{printf("%3d", 3147.7 * ( $1 )^(-3.014))}' |& cat`
		if("$temp" == 856) then
		    set program = "$file"
		    break
		endif
	    end
	endif
	
	# entertainment
	echo -n "."
    end
endif

# check that we found the right program
set temp = `echo "1.54" | $program '{printf("%3d", 3147.7 * ( $1 )^(-3.014))}' |& cat`
if("$temp" == 856) then
    # set up this awk program as nawk
    set nawk = "$program"
    alias nawk $nawk
else
    echo "Dagnabbit!  We can't find a suitable awk program.  What kind of unix is this? "
    echo "Elves may not be able to work."
    set nawk = /bin/awk
    alias nawk awk
endif

# nice symbols, but may not be portable
set ANG = `echo "" | nawk 'BEGIN{printf "\305"}'`
set DEG = `echo "" | nawk 'BEGIN{printf "\260"}'`
#set ANG = "A"
#set DEG = "deg"

# fix OSF1 csh echo shortcomings
set temp = `echo -n "test"`
if(($#temp == 2)&&(-e /usr/bin/echo)) then
    alias echo /usr/bin/echo
endif

if(! $?CCP4) then
    echo -n "Attempting to set up CCP4 ... "

    set ccp4setup = ""
    foreach place ( /programs/xtal/ /usr/xtal/ /programs/xtal /programs/ /usr/xtal /usr/local /usr/ )
	if((! -e "$ccp4setup")&&(-e "$place")) then
	    # look for setup scripts here
	    set ccp4setup = `find ${place} -name ccp4.setup |& nawk '/ccp4.setup$/{print $NF}' | tail -1`
	endif
        if((-e "$ccp4setup")&&(! $?CCP4)) then
            source $ccp4setup
            setenv CCP4_SCR    .
            setenv BINSORT_SCR .
            echo "using $ccp4setup"
        endif
    end
endif
if(! $?CCP4) then
    echo "failed."
    echo "Please ask your sysadmin how to set up CCP4, "
    echo "Or go to: netscape http://www.dl.ac.uk/CCP/CCP4/main.html"
    echo "about getting the CCP4 program suite."
    echo "and run $0 again."
    
    echo "If you have Red Hat Linux, you can get ccp4 by typing:"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-lib-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-progs-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-etc-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-examples-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-doc-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-manual-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-html-3.5.1-5.i386.rpm"
    exit 9
    set CCP4_LIB
endif
setenv CCP4_OPEN       UNKNOWN
# make sure we can write to scratch directories
if(! $?CCP4_SCR) setenv CCP4_SCR .
if(! $?BINSORT_SCR) setenv BINSORT_SCR .

touch ${CCP4_SCR}/this$$ >& /dev/null
if($status) then
    # safest to do this
    setenv CCP4_SCR .
endif
rm -f ${CCP4_SCR}/this$$ >& /dev/null

touch ${BINSORT_SCR}/this$$ >& /dev/null
if($status) then
    # safest to do this
    setenv BINSORT_SCR .
endif
rm -f ${BINSORT_SCR}/this$$ >& /dev/null


# check that current directory is writable
touch ./this$$ >& /dev/null
if($status) then
    # can't write to current directory!
    chmod u+w . >& /dev/null
    touch ./this$$ >& /dev/null
    if($status) then
	# can't chmod current directory either
	echo "ERROR! We can't write to this directory!"
	pwd
	echo "Please cd to the place you want to process your data, and"
	echo "then run $0 again."
	exit 9
    else
	# warn user about what we did
	echo "Had to make current directory writable:"
	echo "chmod u+w ."
    endif
    rm -f ./this$$ >& /dev/null
endif
rm -f ./this$$ >& /dev/null

# no dumping! 
limit coredumpsize 0

# go automatic if user is ignoring us
test -t 1
if($status) then
    echo "output is not a terminal."
    # Q&A would stop process cold
    echo "Elves will answer their own questions."
    echo ""
    echo "$0 $*"
    echo "on "`hostname -s`" at "`date +"%T %D"`
    echo "in "`pwd`
    set AUTO
endif

###############################################################################

 #####   ######  ######    ##    #    #  #        #####   ####
 #    #  #       #        #  #   #    #  #          #    #
 #    #  #####   #####   #    #  #    #  #          #     ####
 #    #  #       #       ######  #    #  #          #         #
 #    #  #       #       #    #  #    #  #          #    #    #
 #####   ######  #       #    #   ####   ######     #     ####

###############################################################################
#
#   Default values for all parameters
#
###############################################################################
#
# crystal variables
set SG         = "unknown"
set CELL       = "unknown"
set MASS       = "unknown"
set CHAINS     = ""
set Vm         = "2.4"
set METAL      = ""
set Ee         = ""
set SITES      = ""
set hiRES      = ""
set loRES      = "1000"
#
# misc program settings
set TITLE         = "Scaler Elves"
set defaultSDCORR = "1.3 0 0.03"
set SPACING       = 10
set CYCLES        = 100
set EXTRA_ARGS    = ""
set USE_VRSET     = "#"
set SCALING       = batch
set BFACTOR       = batch
set ref_batch	  = 1
set wavenames
set wavelengths
set wave_reference
#
# Files generated by this script
set SCRIPT_dir = ./scripts
set LOG_dir    = ./logs
set TEMP_dir   = $CCP4_SCR

set MTZ_dir    = ./mtz
set MAP_dir    = ./maps
set SHARP_dir  = ./mtz
set SOLVE_dir  = ./SOLVE
set SHELX_dir  = ./SHELX 
set XPLOR_dir  = ./XPLOR

# prefixes
set RUNFILE   = ./runlist.txt
set RULESFILE = ./rules.txt
set temp = `basename $0`
set logfile   = ${LOG_dir}/
#set tempfile   = $CCP4_SCR/tempfile
set tempfile   = "${CCP4_SCR}/scaler_temp$$."

# customize file names? 
set rawMTZ     = ${MTZ_dir}/rawdata.mtz
set refMTZ     = ${MTZ_dir}/reference.mtz
set sortMTZ    = ${MTZ_dir}/sorted_ref.mtz
set rscaleMTZ  = ${MTZ_dir}/rough_scaled.mtz
set lscaleMTZ  = ${MTZ_dir}/localscaled.mtz
set finalMTZ   = ${MTZ_dir}/all.mtz

set sortLOG    = ${LOG_dir}/sorting.log
set refLOG     = ${LOG_dir}/make_reference_set.log
set rscaleLOG  = ${LOG_dir}/rough_scale.log
set lscaleLOG  = ${LOG_dir}/localscale.log
set finalLOG   = ${LOG_dir}/scaleit.log
set mergeLOG   = ${LOG_dir}/merge_
set extractLOG = ${LOG_dir}/extract_
set lastLOG    = ${rscaleLOG}

# might as well give these sensible values
set refMTZset  = "IMEAN SIGIMEAN"
set freeR_source = "5%"

# random internal settings
set MAXLINE    = 500
set PROMPT     = "S. Elves-> "
set BELL       = `echo "" | awk 'BEGIN{printf "\07"}'`
set FIRSTIME
set NEW
set temp
set input
set info
set in
set baddirs
set otherSGs
set FRUGAL
set FIX_PROBLEMS

# clear any old temporary files
rm -f ${tempfile}* >& /dev/null
#rm -f $RUNFILE
# accumulate rules?
rm -f $RULESFILE

# sneaky: try to get SDCORR from Wedger runs
egrep -hi "^SDCORR" */*/merge.com |&\
nawk 'toupper($0) ~ /^SDCORR/{print $2, $3, $4}' |\
nawk 'NF==2 || NF==3{sdfac+=$1;sdadd+=$NF; ++count} \
      NF==3{sdB+=$2;++countB} END{if(countB)sdB/=countB; \
      if(count) printf "%.2f %.1f %.2f\n", sdfac/count, sdB, sdadd/count}' |\
cat >! ${tempfile}
set temp = `nawk 'NF==3' ${tempfile}`
if($#temp == 3) set defaultSDCORR = "$temp"
rm -f ${tempfile} >& /dev/null


goto Unwrap_Awk_Scripts
# create ${tempfile}sequencer.awk
# create ${tempfile}elements.awk
# create ${tempfile}ginger.awk
# create ${tempfile}labler.awk
# create ${tempfile}parser.awk
Return_Unwrap_Awk_Scripts:



CommandLine:
###############################################################################
#
#   Process command line
#
###############################################################################
# create ${tempfile}input containing command-line args

if($#argv > 50) then
    # warn user about too many args slowing us down
    echo "Long command line... one moment."
endif

# dump command line into a temporary file (to avoid "word too long" errors)
echo -n "" >! ${tempfile}input
set i = 0
while($i < $#argv)
    @ i = ( $i + 1 )

    # make each word a line (for now)
    echo "$argv[$i]" >> ${tempfile}input
end
# now make lines as long as possible
cat ${tempfile}input |\
nawk -v L=$MAXLINE '{l+=length($0)+2; if(l>L){print ""; l=0}; printf " %s ", $0}' |\
cat >! ${tempfile}longlines
mv ${tempfile}longlines ${tempfile}input


# now check immediately important command-line options
if($#argv != 0) then
    # add final newline
    echo "" >> ${tempfile}input

    # check for cries for help
    grep " help " ${tempfile}input >& /dev/null
    if(! $status) goto Help
    if("$argv[$#argv]" =~ *"?") goto Help
    
    # requests to pick up where we left off
    grep " pick up " ${tempfile}input >& /dev/null
    if(! $status) unset NEW
    grep " -pickup " ${tempfile}input >& /dev/null
    if(! $status) unset NEW
    
    # and requests to skip the pickup lines
    grep " new " ${tempfile}input >& /dev/null
    if(! $status) set NEW
    grep " -new " ${tempfile}input >& /dev/null
    if(! $status) set NEW
    
    if(! $?NEW) goto Pickup
    goto Gather
endif
goto Help


Help:
################################################################################

 #    #  ######  #       #####
 #    #  #       #       #    #
 ######  #####   #       #    #
 #    #  #       #       #####
 #    #  #       #       #
 #    #  ######  ######  #

################################################################################
#	Online help routine
#
#	uses words contained in ${tempfile}input to determine what user wants
################################################################################
cat << EOF

usage: $0 [sentence about your project]

where:
[sentence about your project]	is something like:
files called */raw.mtz are 2A data of a 22kD protein with six Se sites per chain

you can also keep all this information in a file, and give that file
to $0 instead of a sentence

EOF

# insert online help code here

set input = ""
set temp = "no"
echo "Any other questions? [$temp]"
echo -n "$PROMPT"
echo -n "$BELL"
if($?AUTO) then
    echo "$temp"
else
    set in = "$<"
    if("$in" != "") set input = "$in"    
endif
unset NEW
goto Pickup

Pickup:
if($?NEW) goto Gather
###############################################################################

 #####      #     ####   #    #  #    #  #####
 #    #     #    #    #  #   #   #    #  #    #
 #    #     #    #       ####    #    #  #    #
 #####      #    #       #  #    #    #  #####
 #          #    #    #  #   #   #    #  #
 #          #     ####   #    #   ####   #

###############################################################################
#
#   "Pick-up" info from last scala scripts
#
###############################################################################
# collect a list of likely "pickup" files
echo -n "looking for scala scripts ."
onintr SkipPickup
echo "" >! ${tempfile}sources
echo "" >! ${tempfile}pickuplines
unset USEFUL

foreach place ( ./ ${SCRIPT_dir}/  )
    test -d $place
    if(! $status) then
	# get small files in this directory
	ls -lnrt ${place} |& nawk -v dir=$place '$5 < 100000{print dir $NF}' |&\
	cat >> ${tempfile}sources
    endif

    # entertainment
    echo -n "."
end

# clean up filename list
mv ${tempfile}sources ${tempfile}
nawk 'NF==1' ${tempfile} >! ${tempfile}sources

# go through files, looking for scala cards
set files = `cat ${tempfile}sources | wc -l`
set i = 1
while( $i <= $files )
    # get ith filename from the bottom of the list
    set file = `head -$i ${tempfile}sources | tail -1`

    if(-e "$file") then
	# grab probable scala cards (except ones we handle internally)
	nawk '/^scala/,/^EOF/{print toupper($0)}' "$file" |&\
	egrep "^SDCORR|^INTENS|^PARTIALS|^REJECT|^SMOOTH|^DAMP|^FILTER|^BINS|^PRINT|^TIE|^ANALYZE" |\
	grep -v "ANOM" |\
	nawk '! /^#/{card[$1]=$0} END{for(x in card) print card[x]}' |\
	cat >! ${tempfile}cards
    endif

    # see if we got anything
    test -s ${tempfile}cards
    if(! $status) then
	echo " $file"
	cat ${tempfile}cards >> ${tempfile}pickuplines
	
	set USEFUL
	set CHECK_SCALA_CARDS
    endif

    # entertainment
    echo -n "."

    @ i = ( $i + 1 )
end

# simplefy, uniqueify and process the "pickup" lines
cat ${tempfile}pickuplines |\
nawk 'NF>1{for(i=1;i<=NF && ($i !~ /^#/);++i){printf "%s ", $i}; print ""}' |\
sort -u >! ${tempfile}lines

# update rules file
cat ${tempfile}lines >> $RULESFILE

if(! $?USEFUL) then
    echo -n "didn't find anything."
else
    set FOUND_SOMETHING
endif
echo ""

SkipPickup:
onintr

if(! $?FOUND_SOMETHING) then
    # message if we skipped ahead
    echo "skipped."
    echo "use:"
    echo " $0 $* -new"
    echo " to prevent this search alltogether."
endif

#clean up temporary files
rm -f ${tempfile}lines       >& /dev/null
rm -f ${tempfile}cards       >& /dev/null
rm -f ${tempfile}pickuplines >& /dev/null
rm -f ${tempfile}sources     >& /dev/null


goto Gather
# appease onintr bugs
end
endif
end
endif



Gather:
onintr
###############################################################################

  ####     ##     #####  #    #  ######  #####
 #    #   #  #      #    #    #  #       #    #
 #       #    #     #    ######  #####   #    #
 #  ###  ######     #    #    #  #       #####
 #    #  #    #     #    #    #  #       #   #
  ####   #    #     #    #    #  ######  #    #

###############################################################################
#
#   Gather info indicated in $input and/or ${tempfile}input
#
###############################################################################
rm -f ${tempfile}mtzs ${tempfile}elvish >& /dev/null
touch ${tempfile}mtzs
touch ${tempfile}xfiles
touch ${tempfile}batches
touch ${tempfile}elvish
touch ${tempfile}sequence
touch ${tempfile}input
touch $RULESFILE
touch $RUNFILE
set understood = ""



###############################################################################
# convert all inputs into a file
set input = ( $input )
# input variable overrides the file
if($#input != 0) then
    echo " $input " >! ${tempfile}input
endif

# Backup question: querry user about empty input
set temp = `cat ${tempfile}input | wc -l`
if($temp == 0) then
    # backup, default question
    set temp = "nothing"
    echo "What's wrong? [$temp]"
    echo -n "$PROMPT"
    echo -n "$BELL"
    if($?AUTO) then
        echo "$temp"
    else
        set in = "$<"
        if("$in" != "") set temp = "$in"
    endif
    if("$temp" == "nothing") goto RemoveStuff
    echo "$temp" >! ${tempfile}input
endif



###############################################################################
#
#	look for "help me" phrases
#
###############################################################################
egrep -i "help|^ what |^ how |^ when |^ where |^ why|\?" ${tempfile}input >& /dev/null
if(! $status) then
    goto Help
endif




###############################################################################
#
#	look for Scaler-specific phrases (program variables, etc.)
#
###############################################################################

RemoveStuff:
##############################################
#   Remove mtzs or scala cards	 	     #
##############################################
# look for "remove" phrases
egrep -i " remove| except| exclud| leave out| don't use| do not use" ${tempfile}input >& /dev/null
if((! $status)&&(! $?FIRSTIME)) then

    # recover/add list of MTZs (for identification)
    touch ${tempfile}mtzs
    if(! $?FIRSTIME) then
	cat $RUNFILE |&\
	nawk '$3=="+"{print $NF, ($6-$4)+1}' |&\
	nawk '{count[$1] += $2} END{for(mtz in count) print mtz, count[mtz]}' |&\
	sort -n +1 >>& ${tempfile}mtzs
    endif
    
    # get user-specified things to remove
    set badmtzs = ""
    set removed = ""
    cat ${tempfile}input |\
    nawk -v key=" remove| except| exclude| leave out|t use" '$0 ~ key{\
           pos=match(tolower($0), key); print substr($0, pos+RLENGTH)}' |\
    cat >! ${tempfile}subinput
    
    set subinput = `cat ${tempfile}subinput |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
    rm -f ${tempfile}subinput >& /dev/null
    foreach word ( $subinput )
	if(("$word" != "and")&&("$word" != '&')&&("$word" != ',')) then
	    unset USEFUL
	    
	    # see if it's an mtz/file
	    nawk -v check=$word '$1 ~ check {print $1}' ${tempfile}mtzs |&\
	    cat >&! ${tempfile}remove
	    set temp = `cat ${tempfile}remove`
	    rm -f ${tempfile}remove
	    if("$temp" != "") then
		# mtz was in our list of used mtzs
		set badmtzs = "$badmtzs $temp"

		set USEFUL
		set removed = "$removed $temp"
	    endif
	    
	    # see if word is a wavelength
	    foreach wave ( $wavenames )
		if("$word" == "$wave") then
		    # get all mtzs from this wavelength
		    cat $RUNFILE |\
		    nawk -v wave=$wave '$2=="wavelength"{p=0} \
		    $2=="wavelength" && $3==wave {p=1} \
		    p==1 && $3=="+"{print $NF}' |\
		    cat >! ${tempfile}remove
		    set temp = `cat ${tempfile}remove`
		    rm -f ${tempfile}remove
		    
		    set badmtzs = "$badmtzs $temp"
		    
		    set USEFUL
		    set removed = "$removed $wave"
		endif
	    end
	    
	    # see if word is a scala keyword
	    grep -i "$word" $RULESFILE >& /dev/null
	    if(! $status) then
		# remove this keyword from the rules file
		echo "removing:"
		grep -i "$word" $RULESFILE 
		echo "from the keyword list."
		
		grep -iv "$word" $RULESFILE >! ${tempfile}rules
		mv ${tempfile}rules $RULESFILE
		
		set USEFUL
		set removed = "$removed $word"
	    endif
	    
	    # break on first non-understood word
	    if(! $?USEFUL) break
	endif
    end
    
    # see if we got any mtzs
    set badmtzs = `echo "$badmtzs"`
    
    # default to removing mtz files
    if(("$removed" == "")&&("$badmtzs" == "")) set badmtzs = "none of them"

    if("$badmtzs" != "") then
	# confirm before removing
	cat ${tempfile}mtzs |\
	nawk '{print $1, "has", $2, "frames"}'

	set temp = "$badmtzs"
	echo "Which files should we leave out? [$temp]"
	echo -n "$PROMPT"
	echo -n "$BELL"
	if($?AUTO) then
	    echo "$temp"
	else
	    set in = "$<"
	    if("$in" == "none") then
		set badmtzs = ""
		set in = ""
	    endif
	    if("$in" != "") then
		echo "" >! ${tempfile}remove
		foreach temp ( $in )
		    # remove each of these items
		    nawk '{print $1}' ${tempfile}mtzs |\
		      grep "$temp" >> ${tempfile}remove
		end
		set badmtzs = `cat ${tempfile}remove`
		rm -f ${tempfile}remove
		
		if("$badmtzs" == "") echo "Huh? "
	    endif
	endif
	
	# remove requested files from the list
	foreach file ( $badmtzs )
	    mv ${tempfile}batches ${tempfile}
	    cat ${tempfile} |\
	    nawk -v remove=$file '$2 != remove {print}' |\
	    sort >! ${tempfile}batches

	    mv ${tempfile}mtzs ${tempfile}
	    cat ${tempfile} |\
	    nawk -v remove=$file '$1 != remove {print $1}' |\
	    sort >! ${tempfile}mtzs

	    mv ${tempfile}input ${tempfile}
	    cat ${tempfile} |\
	    nawk -v remove=$file '{for(i=1;i<=NF;++i){if($i != remove) printf " %s ", $i}; print ""}' |\
	    cat >! ${tempfile}input
 	end
    
	# forget old batch list now
#	rm -f ${tempfile}batches >& /dev/null
    endif

    set understood = "$understood remove"

    # probably don't mean anything else
#    goto Calculate
endif


EditWaves:
###############################################################################
#
#	Change wavelength labels
#
###############################################################################
egrep -i "change| label|name| wave" ${tempfile}input >& /dev/null
if(! $status) then
    cat ${tempfile}input |\
    nawk -v key="change| label|name| wave" '$0 ~ key{\
           pos=match(tolower($0), key); print substr($0, pos+RLENGTH)}' |\
    cat >! ${tempfile}subinput
    
    set subinput = `cat ${tempfile}subinput |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
    rm -f ${tempfile}subinput >& /dev/null
    
    while("$subinput" != "")
	set lambda = ""
	set newname = ""
	set oldname = ""
	
	# figure out wavelength to be renamed
	foreach word ( $subinput )
	    foreach wave ( $wavenames )
		if("$word" == "$wave") then
		    set lambda = `echo $wavenames $wavelengths | nawk -v label="$wave" '{for(i=1;i<=NF;++i){if($i==label) print $(i+NF/2)}}'`
		endif
	    end
	    foreach wl ( $wavelengths )
		if("$word" == "$wl") then
		    set lambda = "$wl"
		endif
	    end
	    if("$lambda" != "") break
	end
	# "$word" should be first word of input after "rename" directive
	
	# remember where we left off, cut off used portion of "$subinput" string
	set subinput = `echo $subinput | nawk -v key="$word" '{print substr($0, index($0, key))}'`

	# get current name of this wavelenth
	set oldname = `echo $wavelengths $wavenames | nawk -v label="$lambda" '{for(i=1;i<=NF;++i){if($i==label) print $(i+NF/2)}}'`

	if("$oldname" != "") then
	    # look for next word beginning in "F" after old name
	    set newname = `cat ${tempfile}input | nawk -v key="$word" '{print substr($0, index($0, key)+length(key))}'`
	    set newname = `echo $newname | nawk -v key="F" '{print substr($0, index($0, key))}'`
	    set newname = `echo $newname | nawk '{print $1}'`
	
	    # prevent duplicate wavelength labels
	    foreach wave ( $wavenames )
		if("$newname" == "$wave") set newname = ""
	    end

	    if("$newname" == "") then
		# no apparent user-specified new name
	
		# default to old label
		set temp = "$oldname"
		echo "What name shall we use for data collected at $lambda $ANG ? [$temp]"
		echo -n "$PROMPT"
		echo -n "$BELL"
		if($?AUTO) then
		    echo "$temp"
		else
		    set in = "$<"
		    if("$in" != "") set temp = "$in"
		endif
		set newname = "$temp"
	    endif
	
	    # final check for appropriate label name
	    foreach wave ( $wavenames )
		# prevent duplicate wavelength names
		if(("$newname" == "$wave")&&("$newname" != "$oldname")) then
		    set wl = `echo $wavenames $wavelengths | nawk -v label="$wave" '{for(i=1;i<=NF;++i){if($i==label) print $(i+NF/2)}}'`
		    echo "$newname already refers to $wl $ANG."
		    set newname = ""
		endif
	    end
	
	    if("$newname" == "") then
		echo "Sorry, but "\"$in\"" is not a good lable. "
		echo "if you want to rename a wavelength, please say something like: "
		echo \""label $oldname as Fnew"\"
		echo ""
		goto EditWaves
	    endif
	
	    # change the wavelength label
	    set temp = ""
	    set i = 1
	    while($i <= $#wavelengths)
		if("$lambda" == "$wavelengths[$i]") then
		    set temp = "$temp $newname"
		else
		    set temp = "$temp $wavenames[$i]"
		endif
		@ i = ( $i + 1 )
	    end
	    set wavenames = `echo "$temp"`
	    # update reference set
	    if("$wave_reference" == "$oldname") set wave_reference = "$newname"

	    set understood = "$understood rename $oldname"
	endif
	
	# now cut off leading text in input string
	set subinput = `echo "$subinput" | nawk -v key="change| label|name| wave" '$0 ~ key{print substr($0, match(tolower($0), key))}'`
	if("$subinput" == "") then
	    break
	endif
    end
    
    unset newname
    unset oldname

    # user probably doesn't want anything else
#    goto Calculate
endif



ChangeReference:
###############################################################################
#
#	allow user to change reference wavelength
#
###############################################################################
egrep -i " reference" ${tempfile}input >& /dev/null
if((! $status)&&("$wavenames" != "")) then
#if((! $status)&&(! $?FIRSTIME)) then
    # user mentioned reference wavelength
    set name = ""

    # check for wavelength names mentioned
    foreach wave ( $wavenames )
	grep " $wave " ${tempfile}input >& /dev/null
	if(! $status) then
	    # select this wavelength name?
	    set name = "$wave"
	    
	    break
	endif
    end
    set understood = "$understood ref $name"

    if("$name" == "") then
	# couldn't find a label
    
	set temp = "$wave_reference"
	echo "Which of $wavenames do you want to use as a reference set? [$temp]"
	echo -n "$PROMPT"
	echo -n "$BELL"
	if($?AUTO) then
	    echo "$temp"
	else
	    set in = "$<"
	    if("$in" != "") set temp = "$in"
	endif
	
	# update user-input reference
	set name = "$temp"
    endif
    
    # final check before changeing reference
    set temp = ""
    foreach wave ( $wavenames )
	if("$name" == "$wave") set temp = "$wave"
    end
    
    if(("$temp" == "")&&("$name" == "all")) then
	# trick: use all data as a reference
	set temp = "$name"
	set name = ""
	set wave_reference = "alldata"
    endif
    if("$temp" == "") then
	echo "Sorry, but "\"$in\"" doesn't tell us anything. "
	echo "$wave_reference is still the reference wavelength. "
	echo "if you want to change the reference wavelength, please say something like: "
	echo \""use $wavenames[$#wavenames] as the reference"\"
	echo ""
	goto ChangeReference
    else
	# $name is indeed, part of $wavenames
	set wave_reference = "$name"
    endif
    
    unset name
    
    # jump over wave editor
#    goto Calculate
endif




ScalaCards:
###############################################################################
#
#	identify user-specified scala cards
#
###############################################################################
cat ${tempfile}input |\
nawk '{print substr($0, index($0, $1))}' |\
egrep -i "^SDCORR|^INTENS|^PARTIALS|^REJECT|^SMOOTH|^DAMP|^FILTER|^BINS|^PRINT|^TIE|^SKIP|^ANALYZE" |\
cat >! ${tempfile}cards

set temp = `cat ${tempfile}cards | wc -l`
if($temp != 0) then
    nawk '{print toupper($0)}' ${tempfile}cards |\
    cat >> $RULESFILE
    
    set understood = "card"
    set CHECK_SCALA_CARDS
    
    # probably don't mean anything else
    rm -f ${tempfile}cards >& /dev/null
#    goto Calculate
endif
rm -f ${tempfile}cards >& /dev/null




GatherFiles:
###############################################################################
#
#	gather file info (mtzs and text)
#
###############################################################################
set words = `cat ${tempfile}input | wc -w`
set word = 0
while($word <= $words)
    # here we want to examine input one word at a time
    @ word = ( $word + 1 )
    set arg = `nawk -v word=$word '{for(i=1;i<=NF;++i){++w; if(w==word) print $i}}' ${tempfile}input`
    
    # reset flag for next pass
    unset USEFUL

    # see if this is an mtz file
    if("$arg" =~ *.mtz) then
	if(-e "$arg") then
	    # add it to the list (check later)
	    echo "$arg" >> ${tempfile}mtzs
	    set USEFUL
	    
	    set understood = "$understood $arg"
	else
	    echo "WARNING: $arg does not exist! "
	endif
    endif
    
    # see if this is a denzo file
    if((-e "$arg")&&(! $?USEFUL)) then
	# filter for denzo format
	head -10 "$arg" | nawk -f ${tempfile}x2york.awk |\
	cat >! ${tempfile}york
	
	grep HEADER ${tempfile}york >& /dev/null
	if(! $status) then
	    # a bird in hand...
	    # look for more files like this? 
	    
	    # check for denzo vs york format
	    grep "WARNING" ${tempfile}york >& /dev/null
	    if(! $status) then
		# this was not a york file
		set NOT_YORK
	    endif
	    
	    # add this denzo file to the list
	    echo "$arg" >> ${tempfile}xfiles
	    set USEFUL
 	    
	    set understood = "$understood $arg"
	endif
	rm -f ${tempfile}york
    endif
    
    # check for directories
    if((-e "$arg")&&(! $?USEFUL)) then
	# make sure this is a directory
	set mtzdir = `ls -lLd $arg |& nawk '/^d/{print $NF "/"}' |& nawk 'BEGIN{RS="/";ORS="/"} (NF>0 || NR==1)' |& head -1`

	# add contents of this directory to our master list of mtzs:
	if(-e "$mtzdir") then
	    echo -n "Looking in $mtzdir "
	
	    ls -lnL $mtzdir |\
	    nawk '/.mtz$/ && /^\-/ && $5+0 > 1000 {print $NF}' |\
	    nawk -v dir=$mtzdir '{print dir $1}' |\
	    cat >! ${tempfile}
	
	    set temp = `cat ${tempfile} | wc -l`
	    if("$temp" > 0) then
		cat ${tempfile}  >> ${tempfile}mtzs
		set USEFUL	    

		set understood = "$understood $mtzdir"
	    else
		echo -n ".. no mtz files.."
		rm -f ${tempfile}
	    	
		# list directory again, this time filtering for denzo files
		ls -lnL $mtzdir |\
		nawk '$NF ~ /[0-9][0-9][0-9]./ && /^\-/ && $5+0 > 1000 {print $NF}' |\
		nawk -v dir=$mtzdir '{print dir $1}' |\
		nawk '! system("head -10 " $1){print "sector", $1}' |\
		nawk -f ${tempfile}x2york.awk |\
		cat >! ${tempfile}york
		
		echo -n "."
		
		set temp = `grep -c sector ${tempfile}york`
		if("$temp" > 0) then
		    echo -n ".. found $temp denzo files."
		    
		    # add these denzo files to the list
		    nawk '/^sector/{print $2}' ${tempfile}york >> ${tempfile}xfiles
		    set USEFUL
	    
		    # check for denzo vs york format
		    grep "WARNING" ${tempfile}york >& /dev/null
		    if(! $status) then
			# this was not a york file
			set NOT_YORK
		    endif
	    
		    set understood = "$understood $arg"
		else
		    echo -n ".. no denzo files either."
		endif
		rm -f ${tempfile}york
	    endif
	    echo ""
	endif
    endif
    
    # may be a partial filename
    if(("$arg" =~ */*)&&(! $?USEFUL)) then
	set mtzdir = `dirname $arg |& nawk 'BEGIN{RS="/";ORS="/"} (NF>0 || NR==1)' |& head -1`
	set mtzdir = "${mtzdir}/"
	set prefix = `basename $arg`

	# look in this directory for mtzs:
	ls -lnL $mtzdir |&\
	nawk '/.mtz$/ && /^\-/ && $5+0 > 1000 {print $NF}' |\
	egrep "^$prefix" |&\
	nawk -v dir=$mtzdir '{print dir $1}' |&\
	cat >! ${tempfile}
	
	set number_of_mtzs = `cat ${tempfile} | wc -l`
	if("$number_of_mtzs" > 0) then
	    echo "Looking at mtzs named ${prefix}* in $mtzdir "
	    cat ${tempfile} >> ${tempfile}mtzs
	    
	    set understood = "$understood $mtzdir"
	    set USEFUL
	else
	    rm -f ${tempfile}
	    

	    # list directory again, this time filtering for denzo files
	    ls -lnL $mtzdir |\
	    nawk '$NF ~ /[0-9][0-9][0-9]./ && /^\-/ && $5+0 > 1000 {print $NF}' |\
	    egrep "^$prefix" |&\
	    nawk -v dir=$mtzdir '{print dir $1}' |\
	    nawk '! system("head -10 " $1){print "sector", $1}' |\
	    nawk -f ${tempfile}x2york.awk |\
	    cat >! ${tempfile}york
	    
	    set temp = `grep -c sector ${tempfile}york`
	    if("$temp" > 0) then
		echo "Found $temp denzo files named ${mtzdir}${prefix}* "

		# filenames were printed after "sector"
		nawk '/^sector/{print $2}' ${tempfile}york >> ${tempfile}xfiles
		set USEFUL
		
		# check for denzo vs york format
		grep "WARNING" ${tempfile}york >& /dev/null
		if(! $status) then
		    # this was not a york file
		    set NOT_YORK
		endif

		set understood = "$understood $arg"
	    endif
	    rm -f ${tempfile}york
	endif
    endif


    #########################################################################
    #
    #	    Examine Text files named on command line
    #
    #########################################################################
    # check for a flat, text file containing something useful
    set file = `ls -Lld $arg |& nawk '/^\-/ && ($5+0 < 100000 || /.seq$/ || /.pdb$/) {print $NF}'`
    if((-e "$file")&&(! $?USEFUL)) then
	
	# check for re-edited $RUNFILE
	set temp = `nawk '$2~/^wave/ && $4==":" && $6=="A" && $NF=="eV"' $file`
	if("$temp" != "") then
	    echo "using run definitions in $file"
	    set wavenames   = `nawk '$2=="wavelength"{print $3}' $RUNFILE`
	    set wavelengths = `nawk '$2=="wavelength"{print $5}' $RUNFILE`
	    set USEFUL
	    set USER_RUNFILE = $file
	    # don't re-define these runs
	    set FINAL_JUMPS
	endif
	
	# use Ginger to look for parameters interesting to Scaler
	cat $file |\
	nawk -f ${tempfile}ginger.awk |\
	nawk '/^SYMM|^CELL|^RESO|^MASS|^ASU|^VM|^SITES/ && NF > 1' |\
	cat >! ${tempfile}
	set temp = `cat $tempfile | wc -l`
	if($temp > 0) then
	    if($?FIRSTIME) echo "Reading parameters in $arg"
	    cat ${tempfile} >> ${tempfile}elvish
	    set USEFUL
	endif
	
	# gather up parameters interesting to scala
	cat $file |\
	nawk 'NF>1 && $1 !~ /^#/{print toupper($0)}' |\
	egrep "^SDCORR|^INTENS|^PARTIALS|^REJECT|^ANOM|^SMOOTH|^DAMP|^FILTER|^BINS|^TITLE|^PRINT|^TIE|^SKIP|^ANALYZE" |\
	cat >! ${tempfile}
	set temp = `cat $tempfile | wc -l`
	if($temp > 0) then
	    if($?FIRSTIME) echo "Reading cards in $arg"
	    cat ${tempfile} >> $RULESFILE
	    set USEFUL
	    set CHECK_SCALA_CARDS
	endif
	
	# get mtz files mentioned in this file
	cat $file |\
	nawk '{for(i=1;i<=length($0);++i){c=substr($0,i,1);\
	     if(c ~ /[\054\044\042\047]/) c= " "; printf "%s", c}; print " "}' |\
	nawk 'BEGIN{RS=" "} NF==1' |\
	nawk '/.mtz$/ {if(! system("test -r " $1 )) print $1}' |\
	cat >! ${tempfile}
	
	set temp = `cat $tempfile | wc -l`
	if($temp > 0) then
	    echo "$temp mtzs from $file"
	    cat ${tempfile} >> ${tempfile}mtzs
	    set USEFUL
	endif
	
	# get denzo files mentioned in this file
	cat $file |\
	nawk '{for(i=1;i<=length($0);++i){c=substr($0,i,1);\
	     if(c ~ /[\054\044\042\047]/) c= " "; printf "%s", c}; print " "}' |&\
	nawk 'BEGIN{RS=" "} NF==1' |\
	nawk '/[0-9][0-9][0-9]./{if(! system("head -10 " $1)) print "sector", $1}' |&\
	nawk -f ${tempfile}x2york.awk |&\
	cat >&! ${tempfile}york
	
	set temp = `grep -c sector ${tempfile}york`
	if($temp > 0) then
	    echo "$temp denzo files from $file"
	    nawk '/^sector/{print $2}' ${tempfile}york >> ${tempfile}xfiles
	    
	    # check for denzo vs york format
	    grep "WARNING" ${tempfile}york >& /dev/null
	    if(! $status) then
		# this was not a york file
		set NOT_YORK
	    endif
	
	    set USEFUL	
	endif
	rm -f ${tempfile}york
	
	# may be a sequence file, see if it reads like one
	cat $file |\
	nawk -f ${tempfile}sequencer.awk |\
	cat >! ${tempfile}
#	nawk '/chain/,NF==0' |\
	set temp = `cat ${tempfile} | wc -l`
	if("$temp" > 1) then
	    # store sequences here too
	    if($?FIRSTIME) echo "Getting protein sequence from $arg"
	    echo ""         >> ${tempfile}sequence
	    cat ${tempfile} >> ${tempfile}sequence
	    echo ""         >> ${tempfile}sequence
	    set USEFUL
	endif
	
	# may have something about the metal in it.
	nawk '{print $0 " "}' $file |\
	nawk 'BEGIN{RS=" "} NF == 1' |\
	nawk '$0+0 == 0' |\
	nawk '$0 !~ "all"' |\
	nawk -f ${tempfile}elements.awk |\
	nawk '$1 > 16' |\
	cat >! ${tempfile}
	set temp = `cat ${tempfile} | nawk 'NR==1{print $2, $3}'`
	if($#temp == 2) then
	    set METAL = "$temp[2]"
	    set Ee = `echo $METAL | ${tempfile}elements.awk | nawk 'NR==1{print $2}'`

	    # look for number in front of element name (sites)
	    cat $arg |\
	    nawk '! /^ATOM/' |\
	    nawk 'BEGIN{RS=" "} NF == 1' |\
	    nawk 'NF==1' |\
	    nawk -v Ee=$temp[1] -v Name=$temp[2] \
	     '(tolower(Name) == tolower($1) || $1 == Ee || tolower($1) ~ /^site/) && v>0 && v<100{print v} {v=$1+0}' |\
	    cat >! ${tempfile}
	    set temp = `nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}' ${tempfile}`
	    rm -f ${tempfile}
	    if("$temp" != "") set SITES = "$temp"
	    
	    set USEFUL
	endif
	
	if(! $?USEFUL) then
	    echo "WARNING: no useful information in $arg"
	endif

	rm -f ${tempfile} >& /dev/null
    endif

    # keep this variable from getting too long
    set understood = `echo $understood | nawk '{print $NF}'`
end

##############################################
#   Finished first command-line pass (files) #
##############################################




GetMTZinfo:
###############################################################################
#
#	Dump MTZ batch info (quickly!)
#
###############################################################################
# now get batch data from any mtz files
touch ${tempfile}batches
touch ${tempfile}mtzs

# remove duplicate files
cat ${tempfile}batches ${tempfile}mtzs |\
nawk 'NF > 5 {seen[$2] = $2} \
      NF < 5 {if(seen[$1] == "") print; seen[$1] = $1}' |\
cat >! ${tempfile}
mv ${tempfile} ${tempfile}mtzs
set number_of_mtzs = `cat ${tempfile}mtzs | wc -l`
if($number_of_mtzs > 0) then

    echo ""
    echo -n "Organizing mtz data ... "
    if(! $?AUTO) then
	echo -n "(Cntrl-C to end)"
	onintr SkipRestofMTZs
    endif
    echo ""

    # create temporary mtzdump input
    echo "HEAD" >! ${tempfile}inp
    echo "BATCH" >> ${tempfile}inp
    
    # run by each mtz in our list, collecting batch info
    set i = 0
    while($i < $number_of_mtzs)
	@ i = ( $i + 1 )
	# pick the i-th file in the list
	set mtz = `head -$i ${tempfile}mtzs | tail -1 | nawk '{print $1}'`
	unset USEFUL

	# dump out its batch contents
	cat ${tempfile}inp |&\
	mtzdump hklin $mtz |&\
	nawk -v file=$mtz '\
	    /Space group =/{sg = $NF+0}\
	    /  Resolution Range /{getline;getline; hires = $6; lores = $4}\
	    /Orientation data for batch/ && /oscillation data/{batch = $5}\
	    /  Cell dimensions /{cell = $4 " " $5 " " $6 " " $7 " " $8 " " $9}\
	    /  Wavelength and dispersion/{wave = $5}\
	    /  Mosaicity /{mos = $NF}\
	    /  Start & stop Phi angles/{start = $7; stop = $8}\
	    /Detector information/{print wave, file, batch, start, stop, mos, hires, lores, sg, cell}' |&\
	nawk '$1+0 > 0 && $NF+0 > 10' |&\
	cat >&! ${tempfile}
	
	set temp = `nawk '$1+0 > 0.1' ${tempfile} | wc -l`
	if("$temp" > 0) then
	    cat ${CLIBD}/symop.lib |\
	    nawk 'NF>5{print "SYMM",$1,$4}' |\
	    cat - ${tempfile} |\
	    nawk '/^SYMM/{SG[$2]=$3;next} {$9=SG[$9];print}' |\
	    cat >> ${tempfile}batches
	    echo "$mtz ($temp frames)"
	    continue
	endif

	if(! $?SEARCHED_FOR_MTZS) then
	    # check to see if it could be merged data (a reference)
	    echo "HEAD" | mtzdump hklin $mtz |\
	    nawk '/Number of Reflections/ && $NF==0{exit} \
		   /Column Labels/{getline;getline;while(NF){j=0;for(i=1;i<=NF;++i){++j;label[i]=$j;} getline}}\
		   /Column Types/{getline;getline;while(NF){j=0;for(i=1;i<=NF;++i){++j;print $j, label[i];} getline}}'|\
	    cat >&! ${tempfile}labels
	    
	    # check for reference data
	    set temp = `nawk '$1=="F" || $1=="J"' ${tempfile}labels | wc -l`
	    if($temp > 0) then
		echo "$mtz (merged reference)"
		set USER_REFERENCE
		set refMTZ = "$mtz"
		set refMTZset = ""
		set USEFUL
	    endif
	    # check for free-R flags
            set temp = `nawk 'tolower($NF) ~ /freer/ {print $NF}' ${tempfile}labels`
            if("$temp" != "") then
                set freeR_source = "$mtz"
		set USEFUL
            endif
	    
	    # this mtz must just not be anything useful...
	    if(! $?USEFUL) then
		# warn about explicitly specified bad files (don't complain about ones we found)
		foreach file ( $understood )
		    if("$file" == "$mtz") echo "WARNING: no batches in $mtz"
	        end
	    endif
	    rm -f ${tempfile}labels >& /dev/null
	endif
	
#	@ i = ( $i + 1 )
    end

    goto SkipRestofMTZs

    # avoid onintr nesting bug
    end
    end
    end
    endif
    endif
    endif

SkipRestofMTZs:
    onintr
    
    # clean up
    rm -f ${tempfile}labels >& /dev/null
    rm -f ${tempfile}mtzs
    rm -f ${tempfile}inp
endif

GetDenzoInfo:
###############################################################################
#
#	Denzo files will be represented in wildcard format for the
#	purposes of identifying "same" source batches:
#	frame_001.x -> frame_###.x, batch 1
#
#	the batch number from the filename will become the last word on the line
#
###############################################################################
# now get batch data from any mtz files
if(! -e ${tempfile}xfiles) touch ${tempfile}xfiles
set temp = `nawk 'NF==1' ${tempfile}xfiles | wc -l`
if("$temp" > 0) then
    echo ""
    echo -n "Organizing denzo data .."
    
    set DENZO_FILES
    
    # parse filenames (into denzoish template and batch number)
    sort -u ${tempfile}xfiles |\
    nawk 'NF==1{template="";batch=""; ext=""; width=pad=""; \
	n=split($1,a,".");\
	if(n>1){ext = "." a[n]};\
	template = a[1];\
	for(i=2;i<n;++i){template = template "." a[i]};\
	for(i=length(template);substr(template,i,1) ~ /[0-9]$/; --i){++width; pad=pad "#"};\
	batch=substr(template,i+1); template=substr(template,1,i); \
	if(batch != ""){width="%0" width "d"}else{batch="-"}; \
    print template pad ext, batch, $1;}' >! ${tempfile}parsed
    
    # entertainment
    echo -n ".this will take a while"
    
    # get same info from .x files that we would get from MTZs
    sort -u ${tempfile}parsed |\
    nawk '{file=$NF; template=$1; batch=$2;\
	while("cat " file | getline){ print ;\
	    if($1 ~ /^sector/){print "FILENAME", file, batch, template}};\
	close("cat " file);}' |\
    nawk -f ${tempfile}x2york.awk |\
    nawk '/[a-z]/' >! ${tempfile}denzoinfo
    
    # entertainment
    echo -n "."
    
    # get the X-Y range for rotaprep (much later)
    set XYrange = `tail -10 ${tempfile}denzoinfo | nawk '/X_range:/{print $2, $3, $5, $6}'`
    
    # now extract batch info from this processed file
    cat ${tempfile}denzoinfo |\
    nawk '/^sector/{sector = $2+0;};\
	  /^FILENAME/{file = $2; batch=$3; template=$4};\
	  /^oscillatio/{phi0 = $3+0; phiend=$NF+0};\
	  /^wavelength/{wave = $2+0};\
	  /^mosaic/{mosaic = $2+0};\
	  /^resolution/{hires= $NF+0; lores =$3+0};\
	  /^space/{sg = toupper($NF)};\
	  /^unit cell/{cell = substr($0, 10)};\
	  /^crossfire/{ \
	osc=(((phiend-phi0)*1000)%360000)/1000; if(osc==0) osc=10000; \
	print wave, template, sector, phi0, phiend, mosaic/osc, hires, lores, sg, cell, file, "DENZO";\
	phi0=phiend=cell=wave=phi="-";}' |\
    sort +0n -1  +1 -2  +2n -3 >! ${tempfile}denzobatches

# faster, but can't handle cat-ed xfiles or XY range
#    # get same info from .x files that we would get from MTZs
#    sort -u ${tempfile}parsed |\
#    nawk '{file=$NF; template=$1; batch=$2; phi0=phiend=cell=wave=phi="-";\
#	while("tail -50 " file | getline){   ;\
#	    if($1 ~ /^wavelength/){wave = $2+0};\
#	    if($1 ~ /^oscillatio/){phi0 = $3+0; phiend=$NF+0};\
#	    if($1 ~ /^resolution/){hires= $NF+0; lores =$3+0};\
#	    if($1 ~ /^mosaic/){  mosaic = $2+0};\
#	    if($1 ~ /^sector/){  sector = $2+0};\
#	    if($1 ~ /^space/){       sg = toupper($NF)};\
#	    if($0 ~ /^unit cell/){ cell = substr($0, 10)};\
#	}close("tail -50 " file); osc=(((phiend-phi0)*1000)%360000)/1000; if(osc==0) osc=10000; \
#	print wave, template, sector, phi0, phiend, mosaic/osc, hires, lores, sg, cell, file}' |\
#    sort +0n -1  +1 -2  +2n -3 >! ${tempfile}denzobatches


    # entertainment
    echo "."
    
    # add denzo files to master batch list
    cat ${tempfile}denzobatches >> ${tempfile}batches
    
    # no longer need this file
    rm -f ${tempfile}parsed
    rm -f ${tempfile}denzoinfo
    rm -f ${tempfile}denzobatches
endif
rm -f ${tempfile}xfiles >& /dev/null

#########################################
# ${tempfile}batches has format:
# wavelength file# batch# phi_start phi_stop  mosaicity hires lores SG cell
#########################################


##############################################
# gather statistics on all batches
##############################################
set temp = `nawk '$1+0 > 0.1' ${tempfile}batches | wc -l`
if($temp > 0) then
    echo ""
    echo "Hang on..."
    
    # SG, wave, etc
    cat ${tempfile}batches |\
    nawk 'BEGIN{hires = 10} \
    {++n; ++wavecount[$1]; ++SGcount[$9]; if($6+0>0){mos += $6/($5-$4)}; \
    if(hires > $7){hires=$7+0}; if(lores < $8){lores=$8+0};} \
    END{if(n) mos /= n;\
    for(w in wavecount) {if(wavecount[w] > wavecount[wave]){wave = w}} \
    for(s in SGcount) {if(SGcount[s] > SGcount[SG]){SG = s} } \
    for(s in SGcount) {if(s != SG){++extraSG[s]} } \
    for(s in extraSG) {SG = SG "  " s}  \
    print "WAVE", wave; \
    print "RESO", hires, lores; \
    print "SYMM", SG;   \
    print "MOSAIC_NORM", mos;}' |\
    cat >! ${tempfile}stats

    # analyze cell too
    cat ${tempfile}batches |\
    nawk '{++n;\
    a[n]=$10;b[n]=$11;c[n]=$12;A[n]=$13;B[n]=$14;G[n]=$15; \
    asum+=$10;bsum+=$11;csum+=$12;Asum+=$13;Bsum+=$14;Gsum+=$15;} \
    END{if(n==0) exit;\
    asum/=n;bsum/=n;csum/=n;Asum/=n;Bsum/=n;Gsum/=n; \
    for(x in a) {\
      d=sqrt((a[x]-asum)*(a[x]-asum))/asum; if(d > dCELL) {dCELL = d};\
      d=sqrt((b[x]-bsum)*(b[x]-bsum))/bsum; if(d > dCELL) {dCELL = d};\
      d=sqrt((c[x]-csum)*(c[x]-csum))/csum; if(d > dCELL) {dCELL = d};\
      d=sqrt((A[x]-Asum)*(A[x]-Asum))/Asum; if(d > dCELL) {dCELL = d};\
      d=sqrt((B[x]-Bsum)*(B[x]-Bsum))/Bsum; if(d > dCELL) {dCELL = d};\
      d=sqrt((G[x]-Gsum)*(G[x]-Gsum))/Gsum; if(d > dCELL) {dCELL = d};} \
    print "CELL", asum, bsum, csum, Asum, Bsum, Gsum;\
    printf "maxdCELL %f\n", dCELL*1000;}' |\
    cat >> ${tempfile}stats
    
    # use mtz values to set uninitialized variables
    set temp = `nawk '/^SYMM/{print substr($0,5)}' ${tempfile}stats`
    if("$#temp" > 1) then
	echo "WARNING: $temp found for space group! "
	echo "         $temp[1] will be used."
	set temp = "$temp[1]"
    endif
    if($#temp == 1) then
	# update SG, unless already set
	if("$SG" == "unknown") then
	    set SG = "$temp"
	endif
    else
	echo "ERROR! no space groups found in input files! "
	# this should never happen
	set SG = "P1"
#	goto Cleanup
    endif

    set temp = `nawk '/^CELL/{print substr($0,5)}' ${tempfile}stats`
    if($#temp == 6) then
	# define cell,  unless already set
	if($#CELL != 6) set CELL = `echo "$temp"`
    else
	echo "WARNING: no unit cell in input files ! "
	echo "         What the hell is going on! "
	set CELL = `echo 0 0 0 90 90 90`
    endif

    set temp = `nawk '/^maxdCELL/{printf "%d", $NF}' ${tempfile}stats`
    if(("$temp" != "0")&&($?FIRSTIME)) then
	echo "WARNING: Unit cells differ in input files ! "
	echo "Average Cell: $CELL"
	echo "CELL $CELL" >! ${tempfile}cell
	
	cat ${tempfile}cell ${tempfile}batches |\
	nawk '/^CELL/{a=$2+0; b=$3+0; c=$4+0; A=$5+0; B=$6+0; G=$7+0; D=$NF} \
	/^[0-9]/{if((($10-a)^2+($11-b)^2+($12-c)^2+($13-A)^2+($14-B)^2+($15-G)^2) > 0){\
	print $2 ": cell = ",  $10, $11, $12, $13, $14, $15} }' |\
	nawk '{CELL[$1]=$0} END{for(mtz in CELL){print CELL[mtz]}}' |\
	sort | nawk '{print} NR==11{print"etc..."; exit}'
	rm -f ${tempfile}cell >& /dev/null
	
	echo "         How are we supposed to know which one to use? "
	echo "         Unless you think your unit cell is really changing, "
	echo "         you should use the same cell for all your frames! "
	
	set NEED_TO_POSTREFINE
    endif

    # set up resolution range
    set temp = `nawk '/^RESO/ && $NF+0 > 0.1{print}' ${tempfile}stats`
    if(("$#temp" == "3")&&("$hiRES" == "")) then
	set hiRES = "$temp[2]"
	set loRES = "$temp[3]"
    endif

    # pick most likely metal, based on wavelength data were collected at
    if("$METAL" == "") then
	nawk '/^WAVE/{print $NF}' ${tempfile}stats |\
	nawk -f ${tempfile}elements.awk  >! ${tempfile}
	set temp = `nawk 'NR == 1{print $3}' ${tempfile}`
	if("$temp" != "") then
	    # most likely MAD metal
	    set METAL = "$temp"
	    set Ee = `echo $METAL | ${tempfile}elements.awk | nawk 'NR==1{print $2}'`
	endif
    endif
    
    # evaluate mean normalized mosaicity (mosaicity/osc)
    set MOSAIC = `nawk '/^MOSAIC/{print $NF}' ${tempfile}stats`
    echo $MOSAIC |\
    nawk '$NF > 0.34{print $NF, 0.577 - 0.226*log($NF-0.333)}' |\
    nawk '$NF < 0.99{printf "INTENSITIES SCALE_PARTIALS %.2f\n", $NF};\
    $NF < 0.7 {printf "INTENSITIES PARTIALS MAXWIDTH %d\n", $1+1.5;}' |\
    nawk '/MAXWIDTH/{if($NF>6){print "INTENSITIES PARTIALS"}else{print}}  ! /MAXWIDTH/' |\
    tail -1 >> ${tempfile}parts
    set temp = `cat ${tempfile}parts | wc -l`
    
    # create SCALA card for what to do with partials
    egrep "^INTEN" $RULESFILE | grep "PART" >& /dev/null
    if(($status)&&("$temp" > 0)) then
	# no INTENS PART card yet, and Elves have picked one
	if($?FIRSTIME) then
	    # add our card to the list
	    cat ${tempfile}parts >> $RULESFILE
	else
	    # ask this time
	    cat << EOF

Your crystal mosaicity is high compared to the oscillation angle: ${MOSAIC}:1  
This means you will have few fully-recorded spots in your data set, and the 
(default) fulls-only scaling procedure in scala might be unstable.
Therefore, the elves recommend using partials in scaling, specifically:
EOF
cat ${tempfile}parts
	    set temp = "Yes"
	    echo "Do this? [$temp]"
	    echo -n "$PROMPT"
	    echo -n "$BELL"
	    if($?AUTO) then
		echo "$temp"
	    else
		set in = "$<"
		if("$in" != "") set temp = "$in"
	    endif
	    if("$temp" !~ [Nn]*) then
		cat ${tempfile} >> $RULESFILE
	    endif
	endif
    endif
    
    rm -f ${tempfile}parts
    
    # still merge denzo-derived partials
    if($?NOT_YORK) then
	# see if card is already there
	grep -i "NOTEST" $RULESFILE >& /dev/null
	if($status) then
	    # don't defeat user's will, just bother them
	    if($?FIRSTIME) then
		echo "PARTIALS NOTEST" >> $RULESFILE
	    else
		# explain ourselves?
		cat << EOF
WARNING: some denzo input files are not in york format, and, therefore have
         no record of the fraction-full of your partials.  You are strongly
	 advised to use "PARTIALS NOTEST" to keep scala from throwing out
	 all your partials.
EOF
	    endif
	endif

	set NEED_TO_POSTREFINE
    endif
else
    # no batches found anywhere
    if(! $?SEARCHED_FOR_MTZS) then
	set temp = "I don't know"
	ls */*/raw.mtz >& /dev/null
	if(! $status) set temp = '*/*/raw.mtz'
	echo "Where are the data you want to scale? [$temp]"
	echo -n "$PROMPT"
	echo -n "$BELL"
	if($?AUTO) then
	    echo "$temp"
	else
	    set in = "$<"
	    if("$in" != "") set temp = "$in"
	endif
	if("$temp" == "I don't know") goto MTZsearch
	if("$temp" != "") then
	    set input = "$temp"
	    goto Gather
	endif
	
	goto MTZsearch
    endif
    
    # no mtzs, and can't find any
    echo "No mtz files!  Where are your data? "
    
    goto Problem
endif



##############################################
# now parse out runs, 
# and invent batch adding strategy
##############################################
sort +0n -1 +1 -2 +3n -4 ${tempfile}batches |\
nawk -f ${tempfile}parser.awk |\
cat >! ${tempfile}parsed

# use top part as user-viewable runfile
if($?USER_RUNFILE) then
    set wavenames   = `nawk '$2=="wavelength"{print $3}' $USER_RUNFILE`
    set wavelengths = `nawk '$2=="wavelength"{print $5}' $USER_RUNFILE`
    cat $USER_RUNFILE |\
    nawk '$2 == "wavelength", /\n/' |\
    cat >! ${tempfile}
    cat ${tempfile} >! $RUNFILE
    rm -f ${tempfile} >& /dev/null
else
    cat ${tempfile}parsed |\
    nawk '$2=="wavelength",NF==0' |\
    cat >! $RUNFILE
endif

# use bottom part to keep track of detailed batch adding strategy
cat ${tempfile}parsed |\
nawk '/dump/,NF==0' |\
sort -n >! ${tempfile}strategy

# check for overlap with "reference" batch
cat ${tempfile}strategy |\
nawk -v ref_batch=$ref_batch '{taken[$3]=1} END{while(taken[ref_batch]) ++ref_batch;\
   print ref_batch}' |\
cat >! ${tempfile}ref_batch
set ref_batch = `cat ${tempfile}ref_batch`
rm -f ${tempfile}ref_batch >& /dev/null

# keep batch info around for later (might want to add/delete mtzs)
#rm -f ${tempfile}batches
rm -f ${tempfile}parsed




##############################################
# check for $%&#@ing scala bug
##############################################
if(! $?CPROG) then
    set CPROG = .
    set NO_SRC
endif
grep "lwidas(1,nlprgo" ${CPROG}/scala_/scala.f >& /dev/null
if((! $status)&&($?FIRSTIME)) then
    # crap...
    set temp = "No"
    cat << EOF-message

Bad scala! 

Your version of scala appears to contain a known bug that
existed from CCP4 v 3.5.1 to 4.0.1.  This bug prevents Scaler
Elves from passing scales from one script to the next.  This
means that the localscaling procedure provided here will crash 
just before creating an output mtz file. ;(  There is no 
workaround, other than to forgo localscaling.

To fix your scala program you must edit:
${CPROG}/scala_/scala.f

delete line 9805:
           nlprgo = ncolou
	   
and change line 9854:
          call lwidas(1,nlprgo,pname_out,dname_out,0)
to this:
          call lwidas(1,ncolou,pname_out,dname_out,0)
	  
and then recompile scala:
cd ${CPROG}/scala_
make
mv scala ${CBIN}

Until the above changes have been made, Scaler Elves will
not try to do localscaling for you.  Unfortunately, you 
probably have to be root to do this.

Has these changes already been made? [$temp]
EOF-message
    echo -n "$PROMPT"
    echo -n "$BELL"
    if($?AUTO) then
	echo "$temp"
    else
	set in = "$<"
	if("$in" != "") set temp = "$in"    
    endif
    if("$temp" !~ [Yy]*) then
	echo "disabling localscaling for this run.... sorry."
	set ROUGHSCALE_ONLY
    endif
endif




##############################################
# check for too many batches 
##############################################
set runs = `nawk '/^run/' $RUNFILE | wc -l`
set batches = `cat ${tempfile}batches | wc -l`
@ batches = ( $batches + 1 )
if(($?FIRSTIME)&&(($batches > 1000)||($runs > 40))) then
    set MBATCH
    set MAXRUNS
    # check to see if CCP4 might have been recompiled to handle this
    set MBATCH = `grep -i mbatch ${CPROG}/*.f |& nawk 'BEGIN{FS="="} {for(i=1;i<=NF;++i)if(toupper($i) ~ "MBATCH") print $(i+1)+0}' |& sort -n |& tail -1`
    # assume default value if we can't tell
    if("$MBATCH" !~ [1-9]*) then
	set MBATCH = 1000
	set NO_SRC
    endif
        
    set MAXRUNS = `grep -i maxrun ${CPROG}/scala_/parameter.fh |& nawk 'BEGIN{FS="="} {for(i=1;i<=NF;++i)if(toupper($i) ~ "MAXRUN") print $(i+1)+0}' |& sort -n |& tail -1`
    # assume default value if we can't tell
    if("$MAXRUNS" !~ [1-9]*) set MAXRUNS = 40;
       
    set are = "are"
    if($?NO_SRC) set are = "may be"
 
    if($MAXRUNS < $runs) then
	# crap, too many runs.
	echo "There $are too many runs for your copy of scala."
	echo ""
	echo "Either throw out some small wedges or edit:"
	echo "${CPROG}/scala_/parameter.fh"
	echo "so that maxruns=$runs (or more)"
	echo ""
    endif
    
    if($MBATCH < $batches) then
	# crap, maybe we should recompile CCP4 with a bigger MBATCH?
	echo ""
	echo "There $are too many frames! "
	set temp = `echo "$batches - $MBATCH" | bc`
	cat << EOF

As far as we can tell, your installation of CCP4 can't handle
more than $MBATCH batches at once, and you have ${batches} batches.

You have three choices:
1)  throw out $temp frames from this analysis.
2)  scale each wavelength separately, and combine them after mergeing 
    (that is, do several Scaler runs.)
3)  recompile CCP4 with the maximum allowed batches increased

Doing #3 seems like a real pain, (and it is).  However, it is still
the best option, and here's how you do it:
    
    for both ${CPROG} and ${CLIBS}
    - use 
      grep -i MBATCH \`find ${CPROG} -type f -print\` | grep 1000
      to find files containing MBATCH=1000.  You need to edit these
      files so MBATCH=$batches (at least).  We use MBATCH=10000.
    - also, in: 
      ${CPROG}/scala_/parameter.fh
      you need to change maxbat=1000 and maxmat=1000 and, perhaps 
      maxpmr=2000 to values > $batches as well.  maxrun should be > $runs
      too.
    
    after that, you should be able to rebuild CCP4 with:
    cd ${CCP4}
    make
    make install
    
    but, you have to have write permissions to CCP4 to do this.

EOF
    endif
    if(($MAXRUNS < $runs)||($MBATCH < $batches)) then
	
	# final warning
	echo ""
	echo "If, however you think your copies of scala and sortmtz can handle $batches"
	echo "frames and $runs runs, answer "none" to the question below."
	echo ""
    
	# don't do this twice
	unset FIRSTIME
	
	# pick shortest runs to throw out
	echo -n " remove " >! ${tempfile}input
	
	# name smallest mtzs that add up to the "batch excess"
	nawk '$3 == "+"{print $6-$4+1, $NF}' $RUNFILE | sort -nr |\
	 nawk -v limit=$MBATCH '{batches+=$1} batches > limit{print $NF}' |\
	 cat >! ${tempfile}badfiles
	# find smallest N-MAXRUN runs.
	nawk '$3 == "+"{runcount += $6-$4+1; runfile = runfile " " $NF} \
	      /^run/ {print runcount, runfile; runcount = 0; runfile = ""}' $RUNFILE | sort -nr |\
	 nawk -v limit=$MAXRUNS 'NR >= limit{print $NF}' |\
	 cat >> ${tempfile}badfiles
	
	# uniqueify, and send to mtz eliminator
	sort -u ${tempfile}badfiles | nawk '{printf " %s ", $1}' >> ${tempfile}input
	rm -f ${tempfile}badfiles
	
	# don't override the ${tempfile}input file
	set input = ""
	echo " " >> ${tempfile}input
	
	if($?NO_SRC) then
	    echo "remove none" >! ${tempfile}input
	endif
	
	goto RemoveStuff
    endif

endif





##############################################
#   Word-wise command-line pass              #
##############################################

# go back through command line and allow user to override variables
grep " auto " ${tempfile}input >& /dev/null
if(! $status) then
    # go automatic
    set AUTO
endif
grep " debug " ${tempfile}input
if(! $status) then
    # me
    set
endif

grep " hurry " ${tempfile}input >& /dev/null
if(! $status) then
    # select fast and reckless processing
    set HURRY_UP
endif

grep " keep " ${tempfile}input >& /dev/null
if(! $status) then
    # bail instead of rejecting frames
    set KEEP_ALL_FRAMES
    set FINAL_JUMPS
endif

grep " smooth " ${tempfile}input >& /dev/null
if(! $status) then
    # don't bother trying to find subwedges?
    set USER_SMOOTH
    set SCALING = smooth
    set BFACTOR = smooth
    #set INITIAL_JUMPS
endif

# select no reformatting of run list? 


###############################################################################
# check for protein sequences on command line
cat ${tempfile}input | nawk -f ${tempfile}sequencer.awk >&! ${tempfile}seq

set temp = `cat ${tempfile}seq | wc -l`
egrep "remove|delete" ${tempfile}input >& /dev/null
if(($status)&&($temp != 0)) then

    # remove old sequences from ${tempfile}sequence
    mv ${tempfile}seq ${tempfile}sequence
    # redefine mass
    set MASS = "unknown"
    
    set understood = "$understood new sequence"
endif
rm -f ${tempfile}seq >& /dev/null

# get mass from sequence
if("$MASS" == "unknown") then
    set temp = `nawk '/chain:/{mass += $1} END{if(mass+0 > 1000) printf "%.1f", mass+0}' ${tempfile}sequence`
    if("$temp" != "") set MASS = "$temp"
endif


###############################################################################
# check for metal name/symbol (watch out for single-letter elements, user should say "iodine" or "uranium")
cat ${tempfile}input |\
nawk 'BEGIN{RS=" "} NF==1' |\
nawk '! /[0-9]/ && length($1) != 1' |\
nawk -f ${tempfile}elements.awk >&! ${tempfile}
set temp = `cat ${tempfile} | nawk 'NR==1{print $3}'`
if("$temp" != "") then
    # must have given an element name! 
    set METAL = "$temp"
    set Ee = `echo $METAL | ${tempfile}elements.awk | nawk 'NR==1{print $2}'`

    set understood = "$understood $METAL"
endif


###############################################################################

# use Ginger to translate from English to Elvish
# (then append command-line translation to translations of files)
touch ${tempfile}elvish
cat  ${tempfile}elvish ${tempfile}input |\
nawk -f ${tempfile}ginger.awk |\
cat >! ${tempfile}
mv ${tempfile} ${tempfile}elvish


# now go through x-ray parameters germane to Scaler
# ONLY updating explicit values

grep "SYMM" ${tempfile}elvish >& /dev/null
if(! $status) then
    set temp = `nawk '/^SYMM/{print $NF}' ${tempfile}elvish`
    # check this word against the SG library (ginger can't really do this)
    if( -e $CLIBD/symop.lib) then
	set temp = `nawk -v SG=$temp '$4 == toupper(SG) {print $4}' $CLIBD/symop.lib | head -1`
    endif
    if("$temp" =~ [PpCcIiFfRrHh][1-6]*) then
	set SG = "$temp"
    endif
endif


# unit cell
grep "CELL" ${tempfile}elvish >& /dev/null
if(! $status) then
    # check against SG
    set temp = `nawk -v SG="$SG" '$4 == toupper(SG) {print $NF}' $CLIBD/symop.lib |& head -1`
    nawk -v latt="$temp" '/^CELL/{print substr($0,5), latt}' ${tempfile}elvish |\
    nawk '{a=$1+0; b=$2+0; c=$3+0; A=$4+0; B=$5+0; G=$6+0}\
    $NF == "MONOCLINIC" { A=90; ; G=90; if($4+0 > 5) B=$4+0; if($5+0 > 5) B=$5+0}\
    $NF == "ORTHORHOMBIC" {A=90; B=90; G=90}\
    $NF == "TETRAGONAL" || $NF == "TRIGONAL" || $NF == "HEXAGONAL" {\
    b=a; A=90; B=90; G=120;\
    if((c==0) && (($2-a)^2 > .0001)) {c = $2+0}}\
    $NF == "TETRAGONAL" {G=90}\
    $NF == "CUBIC" {b=a; c=a; A=90; B=90; G=90}\
    END{if(a>5 && b>5 && c>5 && A>5 && B>5 &&G>5) print a, b, c, A, B, G}' |\
    cat >! ${tempfile}
    set temp = `cat ${tempfile}`
    if("$#temp" == 6) then
	# actual unit cell was given
	set CELL = `echo "$temp"`
	set understood = "$understood $temp"
    else
	# cell is bad, ask about it later
	grep -v "CELL" ${tempfile}elvish >! ${tempfile}
	echo "CELL" >> ${tempfile}
	mv ${tempfile} ${tempfile}elvish
    endif
endif


# resolution
grep "RESO" ${tempfile}elvish >& /dev/null
if(! $status) then
    # slower reader
    set temp = `nawk '/^RESO/{print $NF}' ${tempfile}elvish`
    if("$temp" =~ [0-9]*) then
	# explicit value was given
	set hiRES = "$temp"
	set USER_hiRES = "$temp"
	set understood = "$understood $temp"
    endif
    set temp = `nawk '/^loRESO/{print $NF}' ${tempfile}elvish`
    if("$temp" =~ [0-9]*) then
	# explicit value was given
	set loRES = "$temp"
	set understood = "$understood $temp"
    endif
endif


# Matthews number/solvent content
grep "VM " ${tempfile}elvish >& /dev/null
if(! $status) then
    # slower reader
    set temp = `nawk '/^VM/{print $NF}' ${tempfile}elvish`
    if("$temp" =~ [0-9]*) then
	# explicit value was given
	set Vm = "$temp"
	# forget the chain count, recalculate it later
	if("$CHAINS" != "") then
	    if("$SITES" != "") then
		# reset number of sites to monomer count
		set temp = `echo "$SITES $CHAINS" | nawk '$2+0 > 0 {printf "%d", $1/$2}'`
		if("$temp" > 0) set SITES = $temp
	    endif
	    # reset mass if Vm was explicitly specified
	    if("$CHAINS" == 1) set MASS = "unknown"
	    # redetermine chain count
	    set CHAINS = ""
	endif
	set understood = "$understood $temp"
    endif
endif


# Protein MASS size
grep "MASS" ${tempfile}elvish >& /dev/null
if(! $status) then
    # slower reader
    set temp = `nawk '/^MASS/{print $NF}' ${tempfile}elvish`
    if("$temp" =~ [0-9]*) then
	# an actual mass value was given
	set MASS = "$temp"
	set understood = "$understood $temp"
    endif
endif


# ASU size/chain number
grep "ASU" ${tempfile}elvish >& /dev/null
if(! $status) then
    # slower reader
    set temp = `nawk '/^ASU/{print $NF}' ${tempfile}elvish`
    if("$temp" =~ [0-9]*) then
	# most likely need to update metal sites too
	if(("$CHAINS" > 1)&&("$SITES" != "")) then
	    set SITES = `echo "$SITES $CHAINS $temp" | nawk '$2 > 0{printf "%d", ($1/$2) * $3}'`
	endif
	set CHAINS = "$temp"
	set understood = "$understood $temp"
    endif
endif


# number of metal sites
grep "SITES" ${tempfile}elvish >& /dev/null
if(! $status) then
    # slower reader
    set temp = `nawk '/^SITES/{print $NF}' ${tempfile}elvish`
    if("$temp" =~ [0-9]*) then
	# explicit value given
	set SITES = "$temp"
	set understood = "$understood $temp"
    endif
endif


# wavelength names? 


Guess:
###############################################################################

  ####   #    #  ######   ####    ####
 #    #  #    #  #       #       #
 #       #    #  #####    ####    ####
 #  ###  #    #  #            #       #
 #    #  #    #  #       #    #  #    #
  ####    ####   ######   ####    ####

###############################################################################
#
#   Guess/calculate at values not provided by user
#
###############################################################################


##############################################
#   Decide on names for wavelengths
##############################################
# must check every time for new wavelengths
cat $RUNFILE |\
nawk '$2=="wavelength"{++wave; wavelen = $5; if(wavelen+0 == 0){wavelen=1.5470}}\
 $3=="+"{for(i=1;i<=length($NF);++i){c=substr($NF,i,1);if(c ~ /[_.\-\/]/){c=" "};printf "%s", c;};\
          printf " %.0f %s \n", 12398.4245/wavelen, wavelen}' | nawk -f ${tempfile}labler.awk |\
cat >! ${tempfile}labels
set RUNFILE_waves = `nawk '{print $NF}' ${tempfile}labels`

# default to FP for single-wavelength data
if($#RUNFILE_waves == 1) then
    cat ${tempfile}labels |\
    nawk '{$NF = "FP"; print}' |\
    cat >! ${tempfile}
    mv ${tempfile} ${tempfile}labels
endif

set wavenames = ( $wavenames )
if("$#wavenames" != "$#RUNFILE_waves") then

    # we need to update the wavelength names
    echo "$wavelengths $wavenames" |\
    nawk '{for(i=1;i<=NF/2;++i){print $i, $(i+(NF/2))}}' |\
    cat >! ${tempfile}oldlabels
    
    # filter out NEW lables
    cat ${tempfile}labels ${tempfile}oldlabels |\
    nawk 'NF>2 || label[$1] != ""{label[$1] = $NF}\
	  END {for(wave in label) if(label[wave] != "") print wave, label[wave]}' |\
    sort -n >! ${tempfile}
    mv ${tempfile} ${tempfile}labels
    rm -f ${tempfile}oldlabels
    
    set wavenames   = `nawk '{print $NF}' ${tempfile}labels`
    set wavelengths = `nawk '{print $1}' ${tempfile}labels`
endif 

# update reference wavelength (in case its been removed)
nawk -v ref="$wave_reference" '$NF == ref' ${tempfile}labels >! ${tempfile}
set temp = `cat ${tempfile} | wc -l`
rm -f ${tempfile} >& /dev/null
if(("$temp" == 0)&&("$wave_reference" != "alldata")) then
    # no reference?
    
    # (re)generate labels file
    echo "$wavenames $wavelengths" |\
    nawk '{for(i=1;i<=NF/2;++i){print $i, $(i+(NF/2))}}' |\
    cat >! ${tempfile}labels

    # pick new reference wavelength with the most frames
    cat $RUNFILE |\
    nawk '$2=="wavelength"{lambda=$5} $3=="+"{count[lambda]+=1+$6-$4} \
    END{for(lambda in count)print count[lambda], lambda}' |\
    sort -n | tail -1 | \
    nawk '{print $NF}' >! ${tempfile}newref

    # now retrieve the label for this wavelength
    cat ${tempfile}labels ${tempfile}newref |\
    nawk 'NF == 2{label[$NF+0] = $1} NF==1{print $1, label[$1+0]}' |\
    cat >! ${tempfile}refname
    set wave_reference = `nawk '{print $NF}' ${tempfile}refname`

    rm -f ${tempfile}refname >& /dev/null
    rm -f ${tempfile}newref >& /dev/null

    # should always find something, but...
    if("$wave_reference" == "") set wave_reference = `echo "$wavenames" | nawk '{print $1}'`

endif

rm -f ${tempfile}labels









set ASU_per_CELL = `nawk -v SG=$SG '$4 == toupper(SG) {print $2}' $CLIBD/symop.lib |& head -1`
if("$ASU_per_CELL" == "") set ASU_per_CELL = 1


# make unit cell consistent with space group (redundant here)
# check against SG
set temp = `nawk -v SG="$SG" '$4 == toupper(SG) {print $6}' $CLIBD/symop.lib |& head -1`
echo "$CELL $temp" |\
nawk '{a=$1+0; b=$2+0; c=$3+0; A=$4+0; B=$5+0; G=$6+0}\
    $NF == "MONOCLINIC" { A=90; ; G=90; if($4+0 > 5) B=$4+0; if($5+0 > 5) B=$5+0}\
    $NF == "ORTHORHOMBIC" {A=90; B=90; G=90}\
    $NF == "TETRAGONAL" || $NF == "TRIGONAL" || $NF == "HEXAGONAL" {\
    b=a; A=90; B=90; G=120;\
    if((c==0) && (($2-a)^2 > .0001)) {c = $2+0}}\
    $NF == "TETRAGONAL" {G=90}\
    $NF == "CUBIC" {b=a; c=a; A=90; B=90; G=90}\
    END{if(a>5 && b>5 && c>5 && A>5 && B>5 &&G>5) print a, b, c, A, B, G}' |\
cat >! ${tempfile}
set temp = `cat ${tempfile}`
rm -f ${tempfile}
if("$#temp" == 6) then
    set CELL = `echo $temp`
endif

# calculate unit cell volume
echo $CELL |\
nawk 'NF==6{s=3.1415926535897899419/180; A=cos(s*$4); B=cos(s*$5); G=cos(s*$6); \
 skew = 1 + 2*A*B*G - A*A - B*B - G*G ; if(skew < 0) skew = -skew;\
 printf "%.3f\n", $1*$2*$3*sqrt(skew)}' |\
cat >! ${tempfile}volume
set CELLvolume = `cat ${tempfile}volume`
rm -f ${tempfile}volume >> /dev/null


# guess at Vm, if we don't know
if("$Vm" == "") set Vm = 2.4

# compute ASU mass consistent with this Vm
set ASU = `echo "$CELLvolume $ASU_per_CELL $Vm" | nawk '$2+0>0 && $3+0>0{print ($1/$2) / $3}'`
if("$ASU" == "") set ASU = "30000"
set NRES = `echo "$ASU" | nawk '{printf "%d", ($1/120)}'`

if("$MASS" == "unknown") then
    # set mass to be consistant with Vm
    set MASS = "$ASU"
    set CHAINS = 1
endif

if("$CHAINS" == "") then
    # we have a mass, but don't know how many chains
    
    # decide to round up or down
    set temp = `echo "$ASU $MASS" | nawk '($1 % $2) >= $2/2{print 1}'`

    # compute chain count most consistant with Vm
    set CHAINS = `echo "$ASU $MASS $temp" | nawk '{printf "%d", ($1/$2)+$3}'`
    
    # probably want to reset number of sites too
    if("$SITES" != "") then
	set SITES = `echo "$SITES $CHAINS" | nawk '{printf "%d", $1 * $2}'`
    endif
endif

# recalculate Vm from protein mass
set ASU = `echo "$CHAINS $MASS" | nawk '{print $1 * $2}'`
set NRES = `echo "$ASU" | nawk '{printf "%d", ($1/120)}'`
matthews_coef << end_mat >&! $tempfile
CELL $CELL
SYMM $SG
molweight $ASU
END
end_mat
set Vm = `grep "Matthews Coefficient is" $tempfile | nawk '{print $NF}' | tail -1`
set SOLC = `grep "protein density is" $tempfile | nawk '{printf "%d", $NF}' | tail -1`
rm -f $tempfile >> /dev/null


# estimate sites from sequence? 
set temp = `cat ${tempfile}sequence | wc -l`
if(("$SITES" == "")&&("$temp" > 0)) then
    if(("$METAL" == "Selenium")||("$METAL" == "Platinum")) then
	# use methionine count as "sites"
	set temp = `cat ${tempfile}sequence | nawk '/met$/{met += $0} END{print met+0}'`
	if("$temp" != "0") set SITES = `echo $temp $CHAINS | nawk '{print $1*$2}'`
	rm -f ${tempfile}
    endif
    if("$METAL" == "Mercury") then
	# use cystine count as "sites"
	set temp = `cat ${tempfile}sequence | nawk '/cys$/{cys += $0} END{print cys+0}'`
	if("$temp" != "0") set SITES = `echo $temp $CHAINS | nawk '{print $1*$2}'`
	rm -f ${tempfile}
    endif
    if("$METAL" == "Gold") then
	# use histidine count as "sites"
	set temp = `cat ${tempfile}sequence | nawk '/his$/{his += $0} END{print his+0}'`
	if("$temp" != "0") set SITES = `echo $temp $CHAINS | nawk '{print $1*$2}'`
	rm -f ${tempfile}
    endif
endif

# guess at number of sites that would be "feasible"
if("$SITES" == "") then
    # just one seems too trivial
    set SITES = $CHAINS
    # estimate number of sites that would be "doable"
    
    # metal should always be set
    if("$METAL" != "unknown") then
	# get wavelength of interest
	set temp = `nawk '/^WAVE/ && $NF+0 > 0 {print 12398.4245/$NF}' ${tempfile}stats`
	if("$temp" == "") set temp = 8014
	
	# get number of electrons in this metal
	echo $METAL $temp | nawk -f ${tempfile}elements.awk |\
	nawk -v mass=$ASU '{k=1/((7^2)*mass/14); \
	print 100*k*($1+$5)^2, 100*k*$6*$6, 100*k*$5*$5, $4}' |\
	nawk '{if($1+0 == 0) $1=99999; \
	       if($2+0 == 0) $1=99999; \
	       if($3+0 == 0) $1=99999; \
	       printf "%d %d %d", 1/$2, 1/$3, 10/$1 }' >! ${tempfile}
	# get sites for 1% Ranom, Rdisp, or 10% Riso
	set temp = `cat $tempfile`
	rm -f ${tempfile}
	
	# update non-trivial numbers of sites
	foreach possibility ( $temp )
	    if(("$SITES" == "")&&("$possibility" > $CHAINS)&&("$possibility" < 30)) then
		set SITES = "$temp"
	    endif
	end
    endif
endif

# don't need this file anymore
rm -f ${tempfile}stats >& /dev/null

# skip questionaire first time through?
if($?FIRSTIME) then
    rm -f ${tempfile}elvish >& /dev/null
    goto Calculate
endif

Questionaire:
###############################################################################

  ####   #    #  ######   ####    #####     #     ####   #    #    ##       #   #####   ######
 #    #  #    #  #       #          #       #    #    #  ##   #   #  #      #   #    #  #
 #    #  #    #  #####    ####      #       #    #    #  # #  #  #    #     #   #    #  #####
 #  # #  #    #  #            #     #       #    #    #  #  # #  ######     #   #####   #
 #   #   #    #  #       #    #     #       #    #    #  #   ##  #    #     #   #   #   #
  ### #   ####   ######   ####      #       #     ####   #    #  #    #     #   #    #  ######

###############################################################################
#
#   Question user about parameters that were mentioned, but not explicitly
#   initialized by ginger
#
###############################################################################

grep "SYMM" ${tempfile}elvish >& /dev/null
if(! $status) then
    set temp = `nawk '/^SYMM/{print $NF}' ${tempfile}elvish`
    if("$temp" != "") then
	# check this word against the SG library
	if( -e $CLIBD/symop.lib) then
	    set temp = `nawk -v SG=$temp '$4 == toupper(SG) {print $4}' $CLIBD/symop.lib | head -1`
	endif
	if("$temp" !~ [PpCcIiFfRrHh][1-6]*) then
	    # must not have been initialized, 
	    # ask user about space group
	    set temp = "$SG"
	    echo "What is your space group? [$temp]"
	    echo -n "$PROMPT"
	    if(! $?AUTO) then
		echo -n "$BELL"
		set in = "$<"
		if("$in" != "") set temp = "$in"
	    else
		echo "$temp"
	    endif

	    # check again
	    if( -e $CLIBD/symop.lib) then
		set temp = `nawk -v SG=$temp '$4 == toupper(SG) {print $4}' $CLIBD/symop.lib | head -1`
	    endif
	    if("$temp" =~ [PpCcIiFfRrHh][1-6]*) then
		set SG = "$temp"
	    else
		# chance to undefine SG
		if("$temp" =~ unk*) then
		    set SG = unknown
		else
		    echo "Sorry, but "\"$temp\"" is not a space group. "
		    echo "P212121 is a space group. "
		    echo "(You could also say: "\""unknown space group"\"" ) "
		endif
	    endif
	endif
	set understood = "$understood $temp"
    endif
endif


# unit cell
grep "CELL" ${tempfile}elvish >& /dev/null
if(! $status) then
    # see if cell was given
    set temp = `nawk '/^CELL/{print substr($0,5)}' ${tempfile}elvish`
    if("$temp" == "") then
	set temp = "$CELL"
	# something is wrong with the cell, but we don't know what
	echo "What is your unit cell? [$temp]"
	echo -n "$PROMPT"
	if(! $?AUTO) then
	    echo -n "$BELL"
	    set in = "$<"
	    if("$in" != "") set temp = "$in"
	else
	    echo "$temp"
	endif
	
	# now check/correct this (potential) cell against the space group
	set temp = "$temp "`nawk -v SG="$SG" '$4 == toupper(SG) {print $6}' $CLIBD/symop.lib |& head -1`
	echo "$temp" |\
	nawk '{a=$1+0; b=$2+0; c=$3+0; A=$4+0; B=$5+0; G=$6+0}\
	$NF == "MONOCLINIC" { A=90; ; G=90; if($4+0 > 5) B=$4+0; if($5+0 > 5) B=$5+0}\
	$NF == "ORTHORHOMBIC" {A=90; B=90; G=90}\
	$NF == "TETRAGONAL" || $NF == "TRIGONAL" || $NF == "HEXAGONAL" {\
	b=a; A=90; B=90; G=120;\
	if((c==0) && (($2-a)^2 > .0001)) {c = $2+0}}\
	$NF == "TETRAGONAL" {G=90}\
	$NF == "CUBIC" {b=a; c=a; A=90; B=90; G=90}\
	END{if(a>5 && b>5 && c>5 && A>5 && B>5 &&G>5) print a, b, c, A, B, G}' |\
	cat >! ${tempfile}
	set temp = `cat ${tempfile}`
	rm -f ${tempfile}
	if($#temp == 6) then
	    # enough info was available to get cell
	    set CELL = `echo "$temp"`
	else
	    # chance to undefine cell
	    if("$in" =~ unk*) then
		set CELL = "unknown"
	    else
		echo "Sorry, but "\"$in\"" is not a unit cell. "
		echo "This is a unit cell: 89.1 89.1 47.5 90 90 120 "
		echo "(you could also have said "\"unknown\"")"
	    endif
	endif
	set understood = "$understood $temp"
    endif
endif


# resolution
grep "RESO" ${tempfile}elvish >& /dev/null
if(! $status) then
    # see if value was given
    set temp = `nawk '/^RESO/{print $NF}' ${tempfile}elvish`
    if("$temp" !~ [0-9]*) then
	set temp = "$hiRES"
	# something is wrong with this parameter, but we don't know what
	echo "How far could this crystal possibly diffract? [$temp A]"
	echo -n "$PROMPT"
	if(! $?AUTO) then
	    echo -n "$BELL"
	    set in = "$<"
	    if("$in" != "") set temp = "$in"
	else
	    echo "$temp"
	endif
	set temp = `echo "RESO $temp" | nawk -f ${tempfile}ginger.awk | nawk '/^RESO/{print $NF}'`
	if("$temp" =~ [0-9]*) then
	    set hiRES = "$temp"
	    set USER_hiRES = "$temp"
	else
	    echo "Sorry, but "\"$in\"" doesn't tell us anything. "
	    echo "Resolution limit is probably 10 - 0.4 Angstroms. "
	endif
	set understood = "$understood $temp"
    endif
    # see if other value was given
    set temp = `nawk '/^loRESO/{print $NF}' ${tempfile}elvish`
    if(("$temp" !~ [0-9]*)&&("$temp" != "")) then
	set temp = "$loRES"
	# something is wrong with this parameter, but we don't know what
	echo "What is the lowest (smallest angle) resolution measured? [$temp A]"
	echo -n "$PROMPT"
	if(! $?AUTO) then
	    echo -n "$BELL"
	    set in = "$<"
	    if("$in" != "") set temp = "$in"
	else
	    echo "$temp"
	endif
	set temp = `echo "low RESO $temp" | nawk -f ${tempfile}ginger.awk | nawk '/^loRESO/{print $NF}'`
	if("$temp" =~ [0-9]*) then
	    set loRES = "$temp"
	else
	    echo "Sorry, but "\"$in\"" doesn't tell us anything. "
	    echo "Low-Resolution limit is probably > 10 Angstroms. "
	endif
	set understood = "$understood $temp"
    endif
endif


# Matthews number/solvent content
grep "VM " ${tempfile}elvish >& /dev/null
if(! $status) then
    # see if value was given
    set temp = `nawk '/^VM/{print $NF}' ${tempfile}elvish`
    if("$temp" !~ [0-9]*) then
	set temp = "$Vm"
	# something is wrong with this parameter, but we don't know what
	echo "What is this crystal's Matthews number/solvent content? [$temp]"
	echo -n "$PROMPT"
	if(! $?AUTO) then
	    echo -n "$BELL"
	    set in = "$<"
	    if("$in" != "") set temp = "$in"
	else
	    echo "$temp"
	endif
	
	# check user input with ginger
	set temp = `echo "Vm $temp" | nawk -f ${tempfile}ginger.awk | nawk '/^VM/{print $NF}'`
	if("$temp" =~ [0-9]*) then
	    set Vm = "$temp"
	    # forget the chain count, recalculate it later
	    if(("$CHAINS" != "")&&("$SITES" != "")) then
		if("$SITES" != "") then
		    # reset number of sites to monomer count
		    set temp = `echo "$SITES $CHAINS" | nawk '$2+0 > 0 {printf "%d", $1/$2}'`
		    if("$temp" > 0) set SITES = $temp
		endif
		# reset mass if Vm was explicitly specified
		if("$CHAINS" == 1) set MASS = "unknown"
		# redetermine chain count
		set CHAINS = ""
	    endif
	    set understood = "$understood $temp"
	else
	    echo "Sorry, but "\"$in\"" doesn't tell us anything. "
	    echo "Matthews numbers usually range from 1.2 to 5. "
	    echo "Solvent contents usually range from 45% to 80%. "
	endif
	set understood = "$understood $temp"
    endif
endif

# Protein MASS size
grep "MASS" ${tempfile}elvish >& /dev/null
if(! $status) then
    # see if an actual mass value was given
    set temp = `nawk '/^MASS/{print $NF}' ${tempfile}elvish`
    if("$temp" !~ [0-9]*) then
	# something is wrong with this parameter, but we don't know what
	set temp = "$MASS"
	echo "What is the molecular weight of your protein? [$temp g/mol]"
	echo -n "$PROMPT"
	if(! $?AUTO) then
	    echo -n "$BELL"
	    set in = "$<"
	    if("$in" != "") set temp = "$in"
	else
	    echo "$temp"
	endif
	if("$in" != "") set temp = "$in"

	# now check user input with Ginger
	set temp = `echo "MASS $temp" | nawk -f ${tempfile}ginger.awk | nawk '/^MASS/{print $NF}'`
	if("$temp" =~ [0-9]*) then
	    set MASS = "$temp"
	else
	    echo "Sorry, but "\"$in\"" doesn't tell us anything. "
	    echo "Protein is probably > 1000 g/mol. "
	endif
	set understood = "$understood $temp"
    endif
endif

# ASU size/chain number
grep "ASU " ${tempfile}elvish >& /dev/null
if(! $status) then
    # see if value was given
    set temp = `nawk '/^ASU/{print $NF}' ${tempfile}elvish`
    if("$temp" !~ [0-9]*) then
	set temp = "$CHAINS"
	if("$CHAINS" == "") set temp = "1"
	# something is wrong with this parameter, but we don't know what
	echo "How many chains do you expect in the asymmetric unit? [$temp/pick for Vm=$Vm]"
	echo -n "$PROMPT"
	if(! $?AUTO) then
	    echo -n "$BELL"
	    set in = "$<"
	    if("$in" != "") set temp = "$in"
	else
	    echo "$temp"
	endif
	set temp = `echo "ASU $temp" | nawk -f ${tempfile}ginger.awk | nawk '/^ASU/{print $NF}'`
	if("$temp" =~ [0-9]*) then
	    # most likely need to update metal sites too
	    if(("$CHAINS" > 1)&&("$SITES" != "")) then
		set SITES = `echo "$SITES $CHAINS $temp" | nawk '$2 > 0{printf "%d", ($1/$2) * $3}'`
	    endif
	    set CHAINS = "$temp"
	else
	    # a few more possibilities
	    if(("$in" =~ *pick*)||(" $in " =~ " 0 ")) then
		# re-guess number of chains
		set CHAINS = ""
	    else
		echo "Sorry, but "\"$in\"" doesn't tell us anything. "
		echo "Sometimes, crystals have two or more protein chains in the asymmetric unit. "
	    endif
	endif
	set understood = "$understood $temp"
    endif
endif

# number of metal sites
grep "SITES" ${tempfile}elvish >& /dev/null
if(! $status) then
    # see if value was given
    set temp = `nawk '/^SITES/{print $NF}' ${tempfile}elvish`
    if("$temp" !~ [0-9]*) then
	set temp = "$SITES"
	# something is wrong with this parameter, but we don't know what
	echo "How many metal sites do you expect per protein? [$temp]"
	echo -n "$PROMPT"
	if(! $?AUTO) then
	    echo -n "$BELL"
	    set in = "$<"
	    if("$in" != "") set temp = "$in"
	else
	    echo "$temp"
	endif
	set temp = `echo "SITES $temp" | nawk -f ${tempfile}ginger.awk | nawk '/^SITES/{print $NF}'`
	if("$temp" =~ [0-9]*) then
	    # see if user mentioned protein mass in the same breath
	    set SITES = "$temp"
	    grep "MASS" ${tempfile}elvish >& /dev/null
	    if(! $status) then
		# sites are probably per protein, not per ASU
		if("$CHAINS" != "") then
		    set temp = `echo "$SITES $CHAINS" | nawk '$1*$2 > 1 {printf "%d", $1*$2}'`
		    if("$temp" != "") set SITES = "$temp"
		endif
	    endif
	else
	    echo "Sorry, but "\"$in\"" doesn't tell us anything. "
	    echo "You probably have 1-50 metal sites. "
	endif
	set understood = "$understood $temp"
    endif
endif


rm -f ${tempfile}elvish >& /dev/null



# check for completely incomprehensible input
set input = `head -1 ${tempfile}input`
rm -f ${tempfile}input
if(("$understood" == "")&&("$input" != "")) then
    set temp = "nothing"
    echo "Um, what, exactly, do you mean by "\"$input\"" [$temp]? "
    echo -n "$PROMPT"
    if(! $?AUTO) then
	echo -n "$BELL"
	set in = "$<"
	if("$in" != "") set temp = "$in"
    else
	echo "$temp"
    endif
    if("$temp" != "nothing") then
	set input = "$temp"
	goto Gather
    endif
endif




Calculate:
###############################################################################

  ####     ##    #        ####   #    #  #         ##     #####  ######
 #    #   #  #   #       #    #  #    #  #        #  #      #    #
 #       #    #  #       #       #    #  #       #    #     #    #####
 #       ######  #       #       #    #  #       ######     #    #
 #    #  #    #  #       #    #  #    #  #       #    #     #    #
  ####   #    #  ######   ####    ####   ######  #    #     #    ######

###############################################################################
#
#   (re)calculate interdependent parameters
#
###############################################################################
# first of all, check that we have data
set temp = `nawk '/^run/' $RUNFILE | wc -l`
if($temp == 0) then
    echo "no data, we will look for some."
    goto MTZsearch
endif

set ASU_per_CELL = `nawk -v SG=$SG '$4 == toupper(SG) {print $2}' $CLIBD/symop.lib |& head -1`
if("$ASU_per_CELL" == "") set ASU_per_CELL = 1

# make unit cell consistant with space group (redundant here)
# check against SG
set temp = `nawk -v SG="$SG" '$4 == toupper(SG) {print $6}' $CLIBD/symop.lib |& head -1`
echo "$CELL $temp" |\
nawk '{a=$1+0; b=$2+0; c=$3+0; A=$4+0; B=$5+0; G=$6+0}\
    $NF == "MONOCLINIC" { A=90; ; G=90; if($4+0 > 5) B=$4+0; if($5+0 > 5) B=$5+0}\
    $NF == "ORTHORHOMBIC" {A=90; B=90; G=90}\
    $NF == "TETRAGONAL" || $NF == "TRIGONAL" || $NF == "HEXAGONAL" {\
    b=a; A=90; B=90; G=120;\
    if((c==0) && (($2-a)^2 > .0001)) {c = $2+0}}\
    $NF == "TETRAGONAL" {G=90}\
    $NF == "CUBIC" {b=a; c=a; A=90; B=90; G=90}\
    END{if(a>5 && b>5 && c>5 && A>5 && B>5 &&G>5) print a, b, c, A, B, G}' |\
cat >! ${tempfile}
set temp = `cat ${tempfile}`
rm -f ${tempfile}
if("$#temp" == 6) then
    set CELL = `echo $temp`
endif

# calculate unit cell volume
echo $CELL |\
nawk 'NF==6{s=3.1415926535897899419/180; A=cos(s*$4); B=cos(s*$5); G=cos(s*$6); \
 skew = 1 + 2*A*B*G - A*A - B*B - G*G ; if(skew < 0) skew = -skew;\
 printf "%.3f\n", $1*$2*$3*sqrt(skew)}' |\
cat >! ${tempfile}volume
set CELLvolume = `cat ${tempfile}volume`
rm -f ${tempfile}volume >> /dev/null


# guess at Vm, if we don't know
if("$Vm" == "") set Vm = 2.4

# compute ASU mass consistent with this Vm
set ASU = `echo "$CELLvolume $ASU_per_CELL $Vm" | nawk '$2+0>0 && $3+0>0{print ($1/$2) / $3}'`
if("$ASU" == "") set ASU = "30000"
set NRES = `echo "$ASU" | nawk '{printf "%d", ($1/120)}'`

if("$MASS" == "unknown") then
    # set mass to be consistant with Vm
    set MASS = "$ASU"
    set CHAINS = 1
endif

if("$CHAINS" == "") then
    # we have a mass, but don't know how many chains
    
    # decide to round up or down
    set temp = `echo "$ASU $MASS" | nawk '($1 % $2) >= $2/2{print 1}'`

    # compute chain count most consistant with Vm
    set CHAINS = `echo "$ASU $MASS $temp" | nawk '{printf "%d", ($1/$2)+$3}'`
    echo "$CHAINS chains fit best with Vm = $Vm"
    
    # probably want to reset number of sites too
    if("$SITES" != "") then
	set SITES = `echo "$SITES $CHAINS" | nawk '{printf "%d", $1 * $2}'`
    endif
endif


CheckAll:
###############################################################################

  ####   #    #  ######   ####   #    #            ##    #       #
 #    #  #    #  #       #    #  #   #            #  #   #       #
 #       ######  #####   #       ####            #    #  #       #
 #       #    #  #       #       #  #            ######  #       #
 #    #  #    #  #       #    #  #   #           #    #  #       #
  ####   #    #  ######   ####   #    #          #    #  ######  ######

###############################################################################
#
#	Last-minute checks on sensitive parameters
#
###############################################################################

# (re)calculate final Vm
set ASU = `echo "$CHAINS $MASS" | nawk '{print $1 * $2}'`
set NRES = `echo "$ASU" | nawk '{printf "%d", ($1/120)}'`
matthews_coef << end_mat >&! $tempfile
CELL $CELL
SYMM $SG
molweight $ASU
END
end_mat
set Vm = `grep "Matthews Coefficient is" $tempfile | nawk '{print $NF}' | tail -1`
set SOLC = `grep "protein density is" $tempfile | nawk '{printf "%d", $NF}' | tail -1`
rm -f $tempfile >> /dev/null




# load "ADD" values into awk program, for back-calculating frame numbers later
echo "#! $nawk -f" >! ${tempfile}unadd.awk
echo "# $TITLE awk program for retrieving original frame number/file" >> ${tempfile}unadd.awk
echo "# from Scaler-created mtzs" >> ${tempfile}unadd.awk
cat $RUNFILE |\
nawk '$3=="+"{add=$2;for(b=$4;b<=$6;++b){printf "$1 == %d {print \"%d in %s\"}\n", b+add, b, $NF}}' |\
cat >> ${tempfile}unadd.awk
chmod a+x ${tempfile}unadd.awk



###############################################################################
# check the rules file for bad cards (by running it by scala)
###############################################################################

# insert SDCORR into rules file
grep "SDCORR" $RULESFILE | tail -1 >! ${tempfile}rules
if($status) then
    # no SDCORR card!
    echo "SDCORR $defaultSDCORR"  >! ${tempfile}rules
endif
cat $RULESFILE |\
nawk 'NF>1{for(i=1;i<=NF && ($i !~ /^#/);++i){printf "%s ", $i}; print ""}' |\
sort -u  | grep -iv "SDCORR" >> ${tempfile}rules
mv ${tempfile}rules $RULESFILE


# make a meaningless hkl file
echo "   1   1   1  1  1 " >! ${tempfile}.hkl

# create an mtz from it
f2mtz hklin ${tempfile}.hkl hklout ${tempfile}prepme.mtz << EOF >& /dev/null
TITLE dummy mtz for testing scala
SYMM 1
CELL 100 100 100 90.00 90.00 90
LABOUT H K L I SIGI
CTYPOUT H H H R R
SKIP 0
END
EOF

# make it a multirecord MTZ
rotaprep  hklin ${tempfile}prepme.mtz  hklout ${tempfile}sortme.mtz << EOF >& /dev/null
input mtzi
batch 1
labin I=I SIGI=SIGI
EOF

# sort it
sortmtz hklin ${tempfile}sortme.mtz hklout ${tempfile}dummy.mtz << EOF >& /dev/null
${USE_VRSET}VRSET -9E+38
H K L M/ISYM BATCH I SIGI
EOF

rm -f ${tempfile}.hkl
rm -f ${tempfile}prepme.mtz
rm -f ${tempfile}sortme.mtz
#
set badcards = 1
# don't bother checking cards again (if we already checked)
if(! $?CHECK_SCALA_CARDS) set badcards = 0
set catchloop = 20
while(($badcards > 0)&&($catchloop > 0))
    # make temporary, do-nothing input file
    cat $RULESFILE >! ${tempfile}in
    echo "INIT NONE" >> ${tempfile}in
    echo "OUTPUT NONE" >> ${tempfile}in
    echo "NOSCALE" >> ${tempfile}in
    
    # feed this file into scala
    cat ${tempfile}in |\
    scala hklin ${tempfile}dummy.mtz  hklout /dev/null rogues /dev/null >&! ${tempfile}log
#    if($status) set catchloop = 0

    # look for rejected keywords
    cat ${tempfile}log |\
    nawk '/^ Data line/,/ Run number/' |\
    nawk '$1 ~ /[\052]/{print "BADCARD " last} /^ Data line/{last = substr($0,15)}' |\
    cat >! ${tempfile}badcards
    # support html logfile
    cat ${tempfile}log |\
    nawk '/Input keyworded commands/,/Contents/' |\
    nawk '$1 ~ /[\052]/{print "BADCARD " last} {last = $0}' |\
    nawk 'NF > 1' >> ${tempfile}badcards

    set badcards = `cat ${tempfile}badcards | wc -l`

    if($badcards) then
	echo "eliminating: "
	nawk '{print substr($0, 9)}' ${tempfile}badcards | sort -u
	echo "scala did not accept these keywords"
    endif

    # remove the rejected keywords (based on word-wise comparison)
    cat ${tempfile}badcards $RULESFILE |\
    nawk '/^BADCARD/{++N;badcard[N]=substr($0,8);}\
     ! /^BADCARD/{okay=1; for(n in badcard){split(badcard[n],badword);\
       isbad=1; for(i=1;i<=NF;++i){if(badword[i] != $i){isbad=0}}; if(isbad) okay=0} \
                  if(okay) print}' |\
    cat >! ${tempfile}newcards
    mv ${tempfile}newcards $RULESFILE 
    
    # try again if there was a problem
    @ catchloop = ( $catchloop - 1 )
end


if($catchloop < 1) then
    # check to see if scala simply won't run
    echo "INIT NONE" >! ${tempfile}in
    echo "OUTPUT NONE" >> ${tempfile}in
    echo "NOSCALE" >> ${tempfile}in

    # feed this file into scala
    cat ${tempfile}in |\
    scala hklin ${tempfile}dummy.mtz  hklout /dev/null rogues /dev/null >&! ${tempfile}log
    if($status) then
	# cards weren't the problem. Scala just can't run
	cat << EOF
WARNING: unable to run scala on `hostname -s`
	this could be due to insufficient memory, write-protected disks, 
	or just absence of the scala program.  The elves will go ahead and
	set up scala scripts, but they aren't going to run until scala is
	fixed (as your SysAdmin (root) if you don't know how to do this).
EOF
	set NORUN
    else
	# couldn't remove some kind of bad card
	echo "WARNING: one of the following SCALA commands is bad, but we "
	echo "         can't tell which.  They will all be commented out."
	cat $RULESFILE
	mv $RULESFILE ${tempfile}
	nawk '{print "#" $0}' ${tempfile} >! $RULESFILE
    endif
endif

# clean up
rm -f ${tempfile}dummy.mtz
rm -f ${tempfile}badcards
rm -f ${tempfile}log
rm -f ${tempfile}in
unset CHECK_SCALA_CARDS

# (re)insert SDCORR into rules file
grep "SDCORR" $RULESFILE >& /dev/null
if($status) then
    # no SDCORR card!
    mv $RULESFILE ${tempfile}rules
    echo "SDCORR $defaultSDCORR"  >! $RULESFILE
    grep -v "SDCORR" ${tempfile}rules >> $RULESFILE
    rm -f ${tempfile}rules
endif





#########################################
# now update labels in the run file     #
#########################################
echo "$wavelengths $wavenames" |\
nawk '{for(i=1;i<=NF/2;++i){print $i, $(i+(NF/2))}}' |\
cat >! ${tempfile}labels

cat ${tempfile}labels $RUNFILE |\
nawk 'NF==2{label[$1+0]=$2} $2=="wavelength"{$3 = label[$5+0]}\
 NF != 2{print}' >! ${tempfile}runs
mv ${tempfile}runs $RUNFILE

rm -f ${tempfile}labels





# get alternative space groups (Patterson and index equivalent)
if(-e $CLIBD/symop.lib) then
    cat $CLIBD/symop.lib |\
    nawk -v SG=$SG '$5 ~ /^PG/ && ! /m/ && ! /bar/ && $1 < 500 {sys = substr($4, 1, 1); \
    PG[$4]=sys $5; SGs[sys $5] = SGs[sys $5] " " $4} \
    END{print SGs[PG[SG]]}' >! ${tempfile}

    set otherSGs = `cat ${tempfile}`
    
    # add on (potentially) unsupported orthorhombics
    if("$otherSGs" =~ *P222*) then
	set otherSGs = `echo "$otherSGs P2212 P2122 P21221 P22121"`
    endif
    if("$otherSGs" =~ *C222*) then
	#set otherSGs = `echo "$otherSGs C2122 C2212"`
    endif
    
    
    rm -f ${tempfile}
endif






Report:
###############################################################################

 #####   ######  #####    ####   #####    #####
 #    #  #       #    #  #    #  #    #     #
 #    #  #####   #    #  #    #  #    #     #
 #####   #       #####   #    #  #####      #
 #   #   #       #       #    #  #   #      #
 #    #  ######  #        ####   #    #     #

###############################################################################
#
#   Create presentation for user's approval
#
###############################################################################

echo ""
echo "Scaler elves will localscale and merge your data with"
echo ""
if($#wavenames != 1) echo "$#wavenames wavelengths:"
if($#wavenames == 1) echo "$#wavenames wavelength:"
if($?USER_REFERENCE) then
    cat $RUNFILE |\
    nawk  '$2=="wavelength"{printf "%.5f \305  \"%s\"\n", $5, $3;}'
    echo "reference set is $refMTZ"
else
    if("$wave_reference" == "alldata") echo "reference set will be all data"
    cat $RUNFILE |\
    nawk  -v ref=$wave_reference '$2=="wavelength"{\
      printf "%.5f \305  \"%s\"", $5, $3; \
      if($3==ref){print "    <- reference"}else{print ""}}'
endif
echo ""
# format mtz info for user approval
cat $RUNFILE | \
nawk '$2=="wavelength"{name = $3} \
$3=="+"{++n; mtz[n]=$NF; label[n]=name; first[n]=$4; last[n]=$6; \
       if(w1 < length(first[n]))  w1=length(first[n]); \
       if(w2 < length(last[n]))  w2=length(last[n]); \
       if(w3 < length(mtz[n]))  w3=length(mtz[n]);}\
    END {printf "%"w1"s%"w2"s   %-"w3"s  as wavelength\n", "", "frames", "from file";\
    for(i=1;i<=n;++i){ \
    printf "%"w1"d to %"w2"d  %-"w3"s  %s\n", first[i], last[i], mtz[i], label[i];}}'

if($?KEEP_ALL_FRAMES) echo "Elves will not reject any of these frames"
echo ""
echo "The following SCALA cards will be used:"
cat $RULESFILE
#echo "(see $CCP4/doc/scala.doc about what they mean.)"

echo ""
echo "Resolution : $hiRES $ANG - $loRES $ANG"
echo "Space group: $SG"
echo "Unit Cell  : $CELL"
echo "        Vm : $Vm (${SOLC}% solvent)"
echo "Asymmetric unit contains:"
echo "1/$ASU_per_CELL of unit cell, "
echo -n "$ASU Da of protein, "
if("$CHAINS" > 1) echo -n "( $CHAINS $MASS Da chains )"
echo ""
echo "$SITES $METAL sites. (expected)"
echo ""







# master checkpoint
unset FIRSTIME
set temp = "Yes"
echo "Everything look okay? [$temp]"
echo -n "$PROMPT"
if($?AUTO) then
    echo "$temp"
else
    echo -n "$BELL"
    set in = "$<"
    if("$in" != "") set temp = "$in"
endif
set temp = `echo $temp`

# catch unexpected replies
if(("$temp" !~ [Yy]*)||($#temp != 1)) then
    if(($#temp == 1)&&("$temp" =~ [Nn]*)) then
	# one word, began with "N"
	set temp = "nothing"
	echo "What's wrong? [$temp]?"
	if($?AUTO) then
	    echo "$temp"
	else
	    set in = "$<"
	    if("$in" != "") set temp = "$in"
	endif
	set quit = `echo " $temp " | egrep -i " quit | stop | exit "`
	if("$quit" != "") then
	    goto Cleanup
	endif
	if("$temp" != "nothing") then
	    set input = "$temp"
	    goto Gather
	endif
    else
	set input = "$temp"
	goto Gather
    endif
endif

# user said everything is okay!

echo "Good. Don't go away."


set FIRSTIME

# re-update wavelength info from the report file (in case it was changed)
set wavenames   = `nawk '$2=="wavelength"{print $3}' $RUNFILE`
set wavelengths = `nawk '$2=="wavelength"{print $5}' $RUNFILE`

MakeDirs:
###############################################################################
#
# create/verify required subdirectories
#
###############################################################################

foreach dir ( $LOG_dir $MTZ_dir $MAP_dir $SCRIPT_dir $SHARP_dir $SHELX_dir $SOLVE_dir $XPLOR_dir )
    # see if it's already done
    set temp = `ls -ld $dir |& nawk '/^d/{print $NF}'`
    if((-e "$dir")&&(! -e "$temp")) then
	# a file is using this name
	echo "WARNING: we are moving your $dir to ${dir}.old"
	mv -f $dir ${dir}.old
    endif
    if(! -e "$dir") mkdir $dir
end


# now print out scripts consistent with the above values
echo "setting up scripts in ${SCRIPT_dir}/..."
if(-e ${tempfile}unadd.awk) then
    cp ${tempfile}unadd.awk ${SCRIPT_dir}/unadd.awk >& /dev/null
endif

if($?DENZO_FILES) then
    # need this to prepare denzo files
    mv ${tempfile}x2york.awk ${SCRIPT_dir}/x2york.awk
    chmod a+x ${SCRIPT_dir}/x2york.awk
#    mv ${tempfile}strategy denzo.strategy
else
    # aren't going to need this
    rm -f ${tempfile}x2york.awk >& /dev/null
endif

# remove unneded files
if(! $?DEBUG) then
    rm -f ${tempfile}input         >& /dev/null
    rm -f ${tempfile}batches       >& /dev/null
#    rm -f ${tempfile}elements.awk  >& /dev/null
    rm -f ${tempfile}ginger.awk    >& /dev/null
    rm -f ${tempfile}labler.awk    >& /dev/null
    rm -f ${tempfile}parser.awk    >& /dev/null
    rm -f ${tempfile}unadd.awk     >& /dev/null
    rm -f ${tempfile}sequencer.awk >& /dev/null
    rm -f ${tempfile}sequence     >& /dev/null
#    rm -f ${tempfile}runlist      >& /dev/null
#    rm -f ${tempfile}x2york.awk   >& /dev/null
#    rm -f ${tempfile}xfiles       >& /dev/null
endif

goto Generate
# Uses:
$RUNFILE
$RULESFILE

# Info files
#${SCRIPT_dir}/runlist.txt
#${SCRIPT_dir}/rules.txt

# actual scaling scripts
#${SCRIPT_dir}/sort_everything.com
#${SCRIPT_dir}/make_reference_set.com
#${SCRIPT_dir}/import_reference.com
#${SCRIPT_dir}/rough_scale.com
#${SCRIPT_dir}/localscale.com
#${SCRIPT_dir}/merge.com
#${SCRIPT_dir}/extract.com
#${SCRIPT_dir}/scaleit.com

# utilities
#$SCRIPT_dir/FreeRer.com
#$SCRIPT_dir/mtz2various.com
#$SCRIPT_dir/SGsearch.com
#$SCRIPT_dir/reindex.com
#$SCRIPT_dir/bestFH.com
#$SCRIPT_dir/revise.com
#$SCRIPT_dir/mtz_sum.com
#$SCRIPT_dir/scala_summary.com
#$SCRIPT_dir/autoscala

Return_from_Generate:



# Set up SOLVE, (if we havn't already)
if((! $?SOLVE_SET_UP)&&(! $?NO_SOLVE)) then
    echo "setting up solve in ${SOLVE_dir}"
    goto Setup_SOLVE
endif
# ${SOLVE_dir}/spacegroup/solve.com
# ${SOLVE_dir}/mtz2SOLVE.com

Return_from_Setup_SOLVE:





# set up SHELX (if we havn't already)
if(! $?SHELX_SET_UP) then
    echo "setting up shelx in ${SHELX_dir}"
    goto Setup_SHELX
endif
#${SHELX_dir}/spacegroup/shelxs.ins, etc.
#${SHELX_dir}/SHELX2pdb.com
#${SHELX_dir}/mtz2SHELX.com
#${SHELX_dir}/SHELX.com

Return_from_Setup_SHELX:






# x-plor setup (just converter script right now)
if(! $?XPLOR_SET_UP) then
    echo "setting up X-PLOR in ${XPLOR_dir}"
    goto Setup_XPLOR
endif
#${XPLOR_dir}/mtz2XPLOR.com
Return_from_Setup_XPLOR:





# checkpoint??
CheckPoint:
if($?FIRSTIME) then
    # don't need this anymore
    if(! $?DEBUG) rm -f ${tempfile}elements.awk  >& /dev/null

    cat << EOF >! ${tempfile}message
    
    Scaler Elves are now ready to start scaling your data.  Scripts
to run "solve" and "shelx" have been set up, but have no data
files yet.  Your raw data must be scaled and merged first.
    However, since scaling can take longer that you will probably 
want to hang around, there are a few questions we would like you
to answer now:

Would you like to:
1)  Have the Elves give you some merged data to look at ASAP.
2)  Use these scripts to process your data yourself, thankyouverymuch.
3)  Let the Elves edit the scripts, and improve parameters automatically.
EOF
    cat ${tempfile}message
    set temp = "3"
    if($?HURRY_UP) set temp = "1"
    echo "Choose a strategy [$temp]"
    echo -n "$PROMPT"
    if($?AUTO) then
	echo "$temp"
    else
	echo -n "$BELL"
	set in = "$<"
	if("$in" != "") set temp = "$in"
    endif

    # check the actual message for clues
    nawk -v pick="$temp" '$0 ~ pick' ${tempfile}message |\
    nawk '/^[1-3]/{print substr($1, 1, 1)}' | head -1 >! ${tempfile}
    set in = `cat ${tempfile}`
    rm -f ${tempfile}        >& /dev/null
    rm -f ${tempfile}message >& /dev/null

    if("$in" == "") then
	# what the hell? 
    
    else
	# something was chosen
	if(("$in" == "1")||($?HURRY_UP)) then
	    # ASAP
	    echo "Hurrying up..."
	    set SDCORR_OPTIMIZED
	    set BREAKS_SET_UP
	    set RESOLUTION_OK
	    set HURRY_UP
	endif

	if("$in" == "2") then
	    # quit
	    echo "It's all yours! "
	    echo "Don't forget to read the README files. "
	
	    goto Cleanup
	endif

	if("$in" == "3") then
	    # what we planned to do anyway
	    echo "Okay, we'll take it from here."
	    unset HURRY_UP
	endif
    endif
endif
unset FIRSTIME


# fastest possible route to numbers
if($?HURRY_UP) then

    # just use the make file
    make | tee -a ${LOG_dir}/make.log
    if(($status)||(! -e "${finalMTZ}")) then
	# something went wrong

	if((-e "${rscaleMTZ}")&&(! -e ${lscaleMTZ})&&(! $?ROUGHSCALE_ONLY)) then
	    # localscaling seems to have failed

	    # user wants numbers now, so just bypass local scaling
	    echo "reverting to one-batch-per-frame scaling in ${rscaleMTZ}."
	    mv ${LOG_dir}/localscale.log ${LOG_dir}/localscale.log.bad
	    set ROUGHSCALE_ONLY
	    #cp ${rscaleMTZ} ${lscaleMTZ}
	    
	    # let's try that again
	    goto Scaling_Scripts
	endif
	goto FixProblem
    endif

    # done with first numbers, exit
    echo "Elves will make no further modifications to improve your scripts."
    echo "Enjoy your data! "
    echo ""
    goto Cleanup
endif

# first, we need the combined raw-data file
make -q ${rawMTZ}
if($status) then
    # make the file
    make ${rawMTZ} | tee -a ${LOG_dir}/make.log
    if($status) goto FixProblem
endif

# preemtively re-parse runs with scala's initial scales
if((! $?FINAL_JUMPS)&&(! $?INITIAL_JUMPS)) then
    set INITIAL_JUMPS
    # speical, short scala run to just get initial intensities
    set lastLOG = "${LOG_dir}/initial_scales.log"
    echo "determining gross, initial scales (see ${lastLOG})"
    scala hklin ${rawMTZ} << EOF-initscale >! ${lastLOG}
# same resolution range
RESOLUTION $hiRES $loRES
# scales only, no B-factor
INTENSities partials
SCALES batch bfactor off
# only calculate initial scales for all batches
INITIAL mean
cycles 0
noscale
output none
final none
EOF-initscale
    if($status) then
	echo "ERROR determining initial scales."
	# too many parameters? 
	# if so, there's no way we can get initial scales
	goto FixProblem
    endif

    # make an xloggraph table in the log file
    cat ${lastLOG} |\
    nawk 'BEGIN{print "$TABLE : Initial Scales :"; print "$GRAPHS:scale vs batch:N:1, 3: $$";\
                print "batch batch_number scale $$ $$"} \
	/Run number/ && /consists of batches/{run=$3; rot=1;\
		while (NF){ getline; for(i=1;i<=NF;++i){ \
		batch[run "." rot] = $i; ++rot; }}}     \
	/Initial scales for run/{ run = $NF; rot=1; \
		while (NF){ getline; for(i=1;i<=NF;++i){ \
		printf "%5d %6d %s\n", rot, batch[run "." rot], $i; ++rot}}}\
	END{print "$$"}' |\
    cat >! ${tempfile}init_scales
    cat ${tempfile}init_scales >> ${lastLOG}
    rm -f ${tempfile}init_scales >& /dev/null

    
    # use this log as input to the usual discontinuity detector
    set lastLOG = "${LOG_dir}/initial_scales.log"
    goto Discontinuities
endif





ProcessData:

# bring processing up to first all-frame scaling round
make -q ${rscaleMTZ}
if($status) then
    # rscaleMTZ is not up-to-date
    date >> ${LOG_dir}/make.log
    make ${rscaleMTZ} | tee -a ${LOG_dir}/make.log
    if($status) goto FixProblem
    set lastLOG = ${rscaleLOG}

    # check for problems
    grep "no overlaps" ${lastLOG} >& /dev/null
    if(! $status) then
	set GAPS
	goto Overlaps
    endif

    grep "Error in refscl/dsyev" ${lastLOG} >& /dev/null
    if(! $status) then
	set REFSCL_BUG
	goto FixProblem
    endif

    grep "Negative scale" ${lastLOG} >& /dev/null
    if(! $status) then
	goto FixProblem
    endif

    grep "not converged" ${lastLOG} >& /dev/null
    if(! $status) then
	goto FixProblem
    endif

    if(! -e "${rscaleMTZ}") goto FixProblem
endif




###############################################################################
#
#	Improvements
#
###############################################################################
#   This section uses the results of the first scaling run to
#   improve upon scala's input.  The appropriate changes will
#   be made, and the scripts updated.
###############################################################################

# make sure files are there!
unset GAPS
if(-e "${lastLOG}") then
    grep "no overlaps" ${lastLOG} >& /dev/null
    if(! $status) then
	set GAPS
    endif
endif

Overlaps:
###############################
# fill in gaps
###############################
if($?GAPS) then
    # there were gaps
    echo "Some frames had no spots in common with any other frames, "
    echo "and, therefore, undefined scale."
    
    # check to see if partials can be used
    set temp = `nawk '$1 ~ /^INTEN/ && $2 ~ /^PART/ && NF==2' $RULESFILE`
    if("$temp" == "") then
	# increase effective mean-normalized mosaicity
	set MOSAIC = `echo $MOSAIC | nawk '{while($1 < 0.27){$1 *= 1.5}; print $1 * 1.5}'`
	
	# evaluate (as in stats setup)
	echo $MOSAIC |\
	nawk '$NF > 0.34{print $NF, 0.577 - 0.226*log($NF-0.333)}' |\
	nawk '$NF < 0.99{printf "INTENSITIES SCALE_PARTIALS %.2f\n", $NF};\
	$NF < 0.7 {printf "INTENSITIES PARTIALS MAXWIDTH %d\n", $1+1.5;}' |\
	nawk '/MAXWIDTH/{if($NF>6){print "INTENSITIES PARTIALS"}else{print}}  ! /MAXWIDTH/' |\
	tail -1 >! ${tempfile}parts
	
	# update RULES file
	nawk '! (/^INTEN/ && /PART/)' $RULESFILE >! ${tempfile}rules
	cat ${tempfile}parts >> ${tempfile}rules
	mv ${tempfile}rules $RULESFILE
	
	echo "changeing INTENS card to:"
	nawk '/^INTENS/' $RULESFILE
	
	# need to re-create scaling scripts
	rm -f ${tempfile}parts >& /dev/null
	goto Scaling_Scripts
    endif

    # no more partials can be added to scaling
    
    # try using the smoothing function? 
    if(("$SCALING" == "batch")&&(! $?TRIED_EARLY_SMOOTH)) then
	# havn't tried smooth scaling yet.
	set TRIED_EARLY_SMOOTH
	
	echo "imposing smooth scales..."
	set SCALING = smooth
	set BFACTOR = smooth
		
	goto Scaling_Scripts
    endif 

    if(($SPACING < 30)&&(! $?TRIED_MORE_SMOOTH)) then
	# did smooth scaling, just not enough?
	set TRIED_MORE_SMOOTH
	
	set temp = `echo $SPACING | nawk '{printf "%d", 2*$1}'`
	if($temp != "") then
	    set SPACING = "$temp"
	    
	    echo "increasing smoothing requirement to ${SPACING}$DEG window"
	    goto Scaling_Scripts
	endif
    endif
    
#    # evidently, smoothing didn't work
#    if("$SCALING" != "batch") then
#	echo "switching back to frame-by-frame scales."
#	set SCALING = "batch"
#	set BFACTOR = "batch"
#	set SPACING = 10
#    endif
    
    # Look for "hopeless" batches (no scalable spots)
    cat ${lastLOG} |\
    nawk '/ERROR: no overlaps with batch/{print $NF}' |\
    nawk 'BEGIN{FS="."} NF!=0{print "hopeless", $1, $2}' |\
    cat >! ${tempfile}hopeless
    
    # get input batch number from run definitions file
    cat ${tempfile}hopeless $RUNFILE |\
    nawk '/^hopeless /{badrun[$2]=badrun[$2] " " $3-1} \
	  /^run/{if(badrun[$2] != ""){n=split(badrun[$2],bad); \
	         for(i=1;i<=n;++i){print $3+bad[i]}}}' |\
    cat >! ${tempfile}badframes
    rm -f ${tempfile}hopeless
	
    # see if we got anything
    set temp = `nawk 'NF>0' ${tempfile}badframes | wc -l`
    if(($temp > 0)&&(! $?KEEP_ALL_FRAMES)) then
	# we got some "hopeless" frames
	
	# jump ahead to the bad-frame eliminator
	goto BadFrames
    else
	# Can't do anything more with the gaps
	set CANT_FIX_GAPS
	rm -f ${tempfile}badframes
	goto FixProblem
    endif    
endif







FindBadFrames:
###############################
# find bad/outlier frames (based on R factor)
###############################
if(! -e ${lastLOG}) then
    # no logfile? what the hell? 
    goto FixProblem
else
    grep "SIGM0 Imean" ${lastLOG} >& /dev/null
    if($status) then
	# table is not here
	goto FixProblem
    endif
    
    # look for frames with Rfactor > 3x mean
    cat ${lastLOG} |\
    nawk '/SIGM0 Imean/,NF==0' |\
    nawk '/Rfactor /{ofst=index($0, "Rfactor")} \
       $1 ~ /^[0-9]/{badness[$2]=substr($0, ofst)+0;\
       mean+=badness[$2]; ++n}\
    END{if(n){mean/=n; for(frame in badness){if(badness[frame]+0>3*mean)print frame}}}' |\
    sort -n +1 >! ${tempfile}badframes
    
    # check to see if any frames were singled out
    set badframes = `cat ${tempfile}badframes | wc -l`
    if($badframes > 50) then
	# too many bad frames
	echo "intra-frame statistics are very bad."
	echo "you might have the wrong space group! "
	echo "Elves suggest inspecting your frames, and"
	echo "maybe trying a lower-symmetry space group."
	set badframes = 0
    endif
    if($badframes == 0) then
	# no bad frames, so get rid of this file
	rm -f ${tempfile}badframes >& /dev/null
    endif
endif

BadFrames:
###############################
# exclude bad frames? 
###############################
if(-e "${tempfile}badframes") then

    # allow other procedures to eliminate frames via ${tempfile}badframes
    set badframes = `cat ${tempfile}badframes | wc -l`

    if($badframes != 0) then
	if($badframes == 1) then
	    echo "1 frame appears especially bad:"
	else
	    echo "$badframes frames appear especially bad:"
	endif
	
	# report baddies to user (in familiar context)
	cat ${tempfile}badframes | nawk -f ${SCRIPT_dir}/unadd.awk
	

	# bad frames will be CUMULATIVELY eliminated
	cat ${tempfile}badframes $RUNFILE |\
	nawk 'NF==1 && $1+0==$1{Bad[$1]=1} ! /^run / && NF!=1{print} \
 	    /^run /{start=$3; end=$5; \
	      while(Bad[start]){++start}; while(Bad[end]){--end}; \
	      if(start <= end){++run;\
		printf "run %5d %5d to ", run, start;\
		for(b=start;b<=end;++b){\
		    if(Bad[b]){printf "%5d\n", b-1; ++run;\
		    while(Bad[b]){++b};\
		    printf "run %5d %5d to ", run, b;}};\
		printf "%5d\n", end;}}' |\
	cat >! ${tempfile}newruns
	rm -f ${tempfile}badframes >& /dev/null
	
	# update new run list
	if((! $?KEEP_ALL_FRAMES)&&(! $?FINAL_JUMPS)) then
#	if(! $?KEEP_ALL_FRAMES) then
	    echo "updating run definitions in $RUNFILE ... "
	    mv ${tempfile}newruns $RUNFILE
	    
	    # since there WERE bad frames, $RUNFILE has changed, so we need to
	    # remake ${rscaleMTZ}
	    goto Scaling_Scripts
	else
	    rm -f ${tempfile}newruns >& /dev/null
	    echo "Please review the original images, and data reduction runs"
	    echo "for these frames.  IF there is nothing wrong with them..."
	    goto UnFixable
	endif
    endif
    
    rm -f ${tempfile}badframes >& /dev/null
endif






Discontinuities:
###############################
# look for discontinuities of scale
###############################
if(! -e ${lastLOG}) then
    goto FixProblem
else
    # get scale table from the log
    cat ${lastLOG} |\
    nawk '/ Scales v rotation range/,/Total/' |\
    nawk 'NF>2 && ! /[a-z]/{print $4, $6}' |\
    cat >! ${tempfile}scales
    
    set temp = `cat ${tempfile}scales | wc -l`
#    if(($temp == 0)&&("$SCALING" == "batch")) then
    if($temp == 0) then
	# try initial scales?
	cat ${lastLOG} |\
	nawk '/Run number/ && /consists of batches/{run=$3; rot=1;\
		while (NF){ getline; for(i=1;i<=NF;++i){ \
       		  batch[run "." rot] = $i; ++rot; }}}     \
		/Initial scales for run/{ run = $NF; rot=1; \
		while (NF){ getline; for(i=1;i<=NF;++i){ \
       		  print batch[run "." rot], $i; ++rot}}}' |\
   	cat >! ${tempfile}scales
    endif
    
    # convert scales to "jumps"
    cat ${tempfile}scales |\
    nawk '{if(S+0!=0 && $2+0!=0){jump=$2/S; if(jump<1)jump=1/jump;\
           print $1, jump}else{print $1, 1}; S=$2}' |\
    cat >! ${tempfile}jumps
    rm -f ${tempfile}scales >& /dev/null
    
    # compute sigma of all scale jumps
    set sigma = `cat ${tempfile}jumps | nawk '{++n; sum += ($2-1)^2} END{if(n*sum > 0) print sqrt(sum/n)}'`
   
    # check to see if this worked
    if("$sigma" == "") then
	# no scale information
	goto FixProblem
    endif
 
    # re-seal old breaks, but don't ressurrect rejected frames
    cat ${tempfile}jumps $RUNFILE |\
    nawk ' ! /^run /{print}  NF==2 && $1+0==$1 {Used[$1]=1} \
      $3=="+"{add=$2; start=$4+add; end=$6+add; \
          while((! Used[start])&&(start < $6+add)){++start};\
          while((!   Used[end])&&(end   > $4+add)){--end};  \
	  if(start <= end){++run; \
	  printf "run %5d %5d to ", run, start;\
	  for(b=start;b<=end;++b){\
		    if(! Used[b]){printf "%5d\n", b-1; ++run;\
		    while(! Used[b]){++b};\
		    printf "run %5d %5d to ", run, b;}};\
	  printf "%5d\n", end};}' |\
    cat >! ${tempfile}sealedruns
    
    # now break-up new runs at scale jumps > 3 sigma 
    cat ${tempfile}sealedruns |\
    nawk -v sigma=$sigma 'NF==2 && $2 > 1+3*sigma{Break[$1]=1} ! /^run/ && NF != 2{print} \
      /^run /{++run; printf "run %5d %5d to ", run, $3; \
	   for(b=$3;b<$5;++b){if(Break[b+1]){++run;\
	       printf "%5d\nrun %5d %5d to ", b, run, b+1;}};\
	printf "%5d\n", $5;}' |\
    cat >! ${tempfile}newruns
	
    
    # clean up a bit
    rm -f ${tempfile}sealedruns >& /dev/null
    rm -f ${tempfile}jumps >& /dev/null
    
    # make sure we havn't thrown out entire wavelengths
    set newruns = `nawk '/^run/' ${tempfile}newruns | wc -l`
    set oldruns = `nawk '/^run/' $RUNFILE | wc -l`
    set temp = `nawk '$2=="wavelength"' ${tempfile}newruns | wc -l`
    if($temp < $#wavelengths) then
	# no runs found
	echo "unable to interpret scales from ${lastLOG}."
	rm -f ${tempfile}newruns >& /dev/null

	# don't change anything
	echo "$RUNFILE will remain unchanged."
	set newruns = $oldruns
	#goto FixProblem
    endif

    # and not too many runs
    if($newruns > 25) then
	echo "too many scale discontinuities."
	echo "$RUNFILE will be unchanged."
	
	rm -f ${tempfile}newruns >& /dev/null
	set newruns = $oldruns
    endif
    

    # only adjust run file if run definition has actually changed
    if("$oldruns" < "$newruns") then
	# better way to avoid oscillation?
	#set FINAL_JUMPS
	
	# rebuild run list
	echo "Found $newruns contiguous groups of scales."
	echo "updating run definitions to:"
	
	# display new run definitions for the user
	cat ${tempfile}newruns | \
	nawk '$2=="wavelength"{label = $3} \
	$3=="+"{file=$NF; add=$2;} /^run /{print $3-add, $5-add, file, label}' |\
	nawk '{++n; first[n]=$1; last[n]=$2; file[n]=$3; label[n]=$4; \
	       if(w1 < length(first[n]))  w1=length(first[n]); \
	       if(w2 < length(last[n]))  w2=length(last[n]); \
	       if(w3 < length(file[n]))  w3=length(file[n]);}\
	    END {printf "%"w1"s%"w2"s   %-"w3"s  as wavelength\n", "", "frames", "from file";\
	    for(i=1;i<=n;++i){ \
	    printf "%"w1"d to %"w2"d  %-"w3"s  %s\n", first[i], last[i], file[i], label[i];}}'

	if($?FINAL_JUMPS) then
	    echo "However, $RUNFILE will remain unchanged."
	    rm -f ${tempfile}newruns >& /dev/null
	    goto LocalScaling
	endif
	
	mv ${tempfile}newruns $RUNFILE
	
	# make sure we're NOT doing smooth scales next
	if("$SCALING" == "smooth") echo "switching back to framewise scales."
	set SCALING = "batch"
	set BFACTOR = "smooth"
	
	# need to edit scaling scripts again
	goto RoughScale
    endif
    
    # no mid-run discontinuities, so do all-smooth scaling next
    if("$SCALING" == "batch") then
	echo "switching to smooth scales."
	rm -f ${tempfile}newruns >& /dev/null
	set SCALING = "smooth"
	set BFACTOR = "smooth"
	
	# need to edit scaling scripts again
	goto RoughScale
    endif
    
    # didn't need this
    rm -f ${tempfile}newruns >& /dev/null
endif




LocalScaling:
# finish up all scaling (but don't do mergeing yet)
if($?ROUGHSCALE_ONLY) then
    # mimic localscaling
    echo "skipping localscaling"
    if("${rscaleMTZ}" != "${lscaleMTZ}") ln -sf `basename ${rscaleMTZ}` ${lscaleMTZ}
    goto OptimizeSDCORR
endif
set lastLOG = ${lscaleLOG}
make ${lscaleMTZ} | tee -a ${LOG_dir}/make.log
if(($status)||(! -e "${lscaleMTZ}")) goto FixProblem

# check for problems
grep "no overlaps" ${lastLOG} >& /dev/null
if(! $status) then
    set GAPS
    goto Overlaps
endif

grep "Negative scale" ${lastLOG} >& /dev/null
if(! $status) then
    goto FixProblem
endif

grep "not converged" ${lastLOG} >& /dev/null
if(! $status) then
    goto FixProblem
endif





OptimizeSDCORR:
###############################
# optimize SDCORRections
###############################
if(! $?SDCORR_OPTIMIZED) then
    
    set sdfac = `nawk '/^SDCORR/{print $2}' $RULESFILE`
    # get scala-refined sdfac value
    if(-e "$lastLOG") then
	cat ${lastLOG} |\
	nawk '/Final assessment of SDcorrection multipliers/{getline;getline;getline;getline;\
	      while(NF>2){if($2 != 1.0000){++n; print; sum+=($2+$5)/2}; getline}; if(n) printf "%.2f\n", sum/n}' |\
	cat >! ${tempfile}
	set temp = `tail -1 ${tempfile} | nawk '$1 > 0.8 && $1 < 5{print $1}'`
	rm -f ${tempfile}
	if("$temp" != "") set sdfac = "$temp"
    endif

    # blurb about what autoscala is:
    echo "Optimizing SDCORR command in scala... (see ${SCRIPT_dir}/README for details)"
    
    # run autoscala on modified scala script (mergeing reference set)
    cat ${SCRIPT_dir}/merge.com |\
    nawk -v wave=$wave_reference '/^set wave/{$NF = wave} {print}' |\
    nawk -v sdfac=$sdfac 'toupper($0) ~ /^SDCORR/{print "SDCORR", sdfac, $3, $4} toupper($0) !~ /^SDCORR/' |\
    nawk '/^truncate hkl/{print "rm -f " $3 " >& /dev/null"; print "exit"} {print}' |\
    cat >! ${SCRIPT_dir}/test.com
    
#    # run autoscala on modified scala script (mergeing everything)
#    cat ${SCRIPT_dir}/merge.com |\
#    nawk -v wave="all" '/^set wave/{$NF = wave} {print}' |\
#    nawk -v sdfac=$sdfac 'toupper($0) ~ /^SDCORR/{print "SDCORR", sdfac, $3, $4} toupper($0) !~ /^SDCORR/' |\
#    nawk '/^truncate hkl/{print "rm -f " $3 " >& /dev/null"; print "exit"} {print}' |\
#    cat >! ${SCRIPT_dir}/test.com
    
    ${SCRIPT_dir}/autoscala summary=no ${SCRIPT_dir}/test.com | tee ${LOG_dir}/autoscala.log
    rm -f ${SCRIPT_dir}/test.com_test >& /dev/null
    rm -f ${SCRIPT_dir}/test.com >& /dev/null
    
    grep SDCORR ${SCRIPT_dir}/test.com_best >& /dev/null
    if(! $status) then
	# retrieve our new SDCORR value
	cat ${SCRIPT_dir}/test.com_best |\
	nawk 'toupper($0) ~ /^SDCORR/{print toupper($0)}' >! ${tempfile}
	
	# inform user of what we are doing
	echo "Updating error corrections to:"
	cat ${tempfile}
	
	# update "rules" file
	nawk '! /^SDCORR/' $RULESFILE >> ${tempfile}
	mv ${tempfile} $RULESFILE
    else
	# what the hell?
	echo "Unable to determine new SDCORRections! "
	echo "will still use:"
	grep "SDCORR" $RULESFILE
    endif
    
    rm -f ${SCRIPT_dir}/test.com_best >& /dev/null
    
    # only do this once
    set SDCORR_OPTIMIZED
    goto RoughScale
endif


CutResolution:
###############################
# evaluate outer resolution limit (once SDCORR has been optimized)
###############################
if((! $?RESOLUTION_OK)&&(-e "${lastLOG}")) then
    cat ${lastLOG} |\
    nawk '$3 == "Dmin(A)"{skip=12;idx=index($0,"Mn");}\
     {--skip; if(skip == 0){print substr($0,idx,7)}}' |\
     sort -n >! ${tempfile}signal
    set signal = `head -1 ${tempfile}signal`
    rm -f ${tempfile}signal
    
    # should have been detected earlier, but
    if("$signal" == "") then
	goto FixProblem
    endif
    
    echo "Your outer resolution bin has I/sigma = $signal"
    set signal = `echo "$signal" | nawk '{printf "%d", $1*10}'`
    if("$signal" > 40) then
	echo "You could probably measure to higher resolution! "
	echo ""
	# extend resolution upward? (risk oscillation?)
    endif
    if("$signal" < 15) then
	cat ${lastLOG} |\
	nawk '$3 == "Dmin(A)",/Overall/' |\
	nawk '! /[^0-9. -]/{print} /Dmin/{print "idx",index($0,"Mn")}' | \
	nawk '/^idx/{idx=$2;next} NF>3 && (substr($0,idx,7)+0 > 2) {print $3}' |\
	sort -n >! ${tempfile}reso
	set newRES = `head -1 ${tempfile}reso`
	rm -f ${tempfile}reso
	set newRES = `echo "$newRES $hiRES" | nawk 'NF==2 && $1+0>$2+0{print $1}'`
	
	if("$newRES" == "") then
	    # bad log file?
	    set newRES = `echo $hiRES | nawk '{print $1 * 1.1}'`
	endif
	
	echo "This means there are no real spots at $hiRES A."
	echo "Elves suggest $newRES A as a cutoff."
	
	# ask permission?
	#echo "What resolution cutoff do you want? [$temp]"
	
	if(! $?USER_hiRES) then
	    # reset high resolution limit
	    set hiRES = "$newRES"
	    unset SDCORR_OPTIMIZED
	
	    # update scaling scripts
	    goto RoughScale
	endif
    endif
    set RESOLUTION_OK
endif




# check convergence?
if(! -e "${lastLOG}") goto FixProblem
set badLOG = `grep -l "not converged" ${lastLOG} |& nawk 'NF==1' | head -1`
if(-e "$badLOG") goto FixProblem




# what else is there to fiddle with? 




# finish building all targets
# now that we can be confident truncate won't quit
make -i | tee -a ${LOG_dir}/make.log
if(($status)||(! -e "${finalMTZ}")) then
    goto FixProblem
endif



# edit SOLVE files?


#goto RunSOLVE


#goto RunwARP


goto Cleanup












exit
FixProblem:
#
#	Fix Known Problems with Scaler Elves scripts
#
if($?FIX_PROBLEMS) then
    # CRAP!

    # figure out where we went wrong
    set temp = `ls -1 ${mergeLOG}* ${extractLOG}* |& nawk 'NF==1'`
    ls -1t $sortLOG $refLOG $rscaleLOG $lscaleLOG $finalLOG $temp |&\
    nawk 'NF==1' >! ${tempfile}allLOG

    set allLOGs = `cat ${tempfile}allLOG |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`    
    set lastLOG = `head -1 ${tempfile}allLOG`
    rm -f ${tempfile}allLOG >& /dev/null
    
    if("$lastLOG" == "") then
	# some kind of problem on this system?
	set lastLOG = "${LOG_dir}/"`ls -1t ${LOG_dir} | head -1`
    endif
    set badLOG = "$lastLOG"
    
    echo "Procedure failed in: $lastLOG"
    set badSCRIPT = `grep "$lastLOG" Makefile | grep "${SCRIPT_dir}" | nawk '{print $2}'`
    
    # detect un-fixable problems
    if("$lastLOG" == "") then
	# some kind of problem on this system?
	goto UnFixable
    endif
    
    if("$lastLOG" =~ ${extractLOG}*) then
	# no fixes for these! (that we know of)
	goto UnFixable
    endif
    if("$lastLOG" =~ ${finalLOG}*) then
	# no fixes for this! (that we know of)
	goto UnFixable
    endif
    
    
    
    # look for problems we know how to solve
   
    # new mosflm, old sortmtz
    set badLOG = `grep -l "You must either scale down the data or change VRSET" ${allLOGs} |& nawk 'NF==1' | head -1`
    if(-e "$badLOG" && "$USE_VRSET" != "") then

        echo "new mosflm and old sortmtz ... trying to use VRSET"
        set USE_VRSET = ""

        unset cycles
        goto SortScript
    endif

    
    # rare bug in scala 4.x
    set badLOG = `grep -l "Error in refscl/dsyev" ${allLOGs} |& nawk 'NF==1' | head -1`
    if(-e "$badLOG" && ! $?NO_REFERENCE) then

	echo "trouble scaling to reference set ... throwing it out"
	set NO_REFERENCE

	unset cycles
	goto Scaling_Scripts	
	
    endif


    # new "feature" in 5.0.x
    set badLOG = `grep -l "batches are not assigned to datasets" ${allLOGs} |& nawk 'NF==1' | head -1`
    if(-e "$badLOG" && ! $?RENAME_DATASETS) then

        echo "scala is complaining about dataset info in headers ... rewriting it"
        set RENAME_DATASETS

        unset cycles
        goto SortScript

    endif

    if("$lastLOG" == "$sortLOG") then
        # no fixes for sorting (that we know of)
       goto UnFixable
    endif
 
    # convergence (oscillating? )
    set badLOG = `grep -l "not converged" ${lastLOG} |& nawk 'NF==1' | head -1`
    if(-e "$badLOG") then

	echo -n "scaling didn't reach convergence, "
	
	# look through EXTRA_ARGS exacly as scaling scripts did
	set FILTER = "#"
	set cycles = $CYCLES
	set temp = ""
	foreach arg ( $EXTRA_ARGS )
	    # may override default filtering
	    if("$arg" == "filter") then
		set FILTER = ""
	    endif
	    
	    # raw numbers become cycle counts
	    if(("$arg" =~ [1-9]*)&&("$arg" =~ *[0-9])) then
		set cycles = "$arg"
		set arg = ""
	    endif
	    
	    # rebuild list without cycle count
	    set temp = "$temp $arg"
	end
	
	# increase number of cycles given before
	@ cycles = ( $cycles + 50 )
	echo "will try $cycles scaling rounds this time."
	set EXTRA_ARGS = "$temp $cycles"
	
	if(("$cycles" > 110)&&("$FILTER" == "#")) then
	    # maybe try damping?
	    set EXTRA_ARGS = "$EXTRA_ARGS filter"
	    set FILTER = ""
	endif
	
	# report what's going on
	if("$FILTER" == "") echo "eigenvalue filter is active."
    
	unset cycles
	goto Scaling_Scripts
    endif


    # lack-of-overlap problems
    set badLOG = `grep -l "no overlaps" "${lastLOG}" | head -1`
    if(("$badLOG" != "")&&(! $?CANT_FIX_GAPS)) then
	# back up this "problem" run
	cp $badLOG ${badLOG}.bad >& /dev/null
	set badLOG = "${badLOG}.bad"
	set GAPS
	goto Overlaps
    endif
    
    # truncate crash
    set badLOG = `grep -l "Data beyond useful resolution limit" "$lastLOG" | head -1`
    if("$badLOG" != "") then
	echo "$hiRES A bin has no real spots in it."
    
        # back it up, for user examination
	cp $badLOG ${badLOG}.bad >& /dev/null
        set badLOG = "${badLOG}.bad"

	cat ${badLOG} |\
	nawk '/Wilson Plot/,/TRUNCATE/' |\
	nawk 'NF==10 && ! /[a-z]/{print $6}' |\
	tail -1 >! ${tempfile}res
	set newRES = `cat ${tempfile}res`
	rm -f ${tempfile}res
	
	# update high-res limit
	set temp = `echo "$hiRES $newRES" | nawk '{printf "%d", 100*($2-$1)}'`
	if("$temp" < 5) then
	    # slight decrease in resolution cutoff
	    set hiRES = `echo "$hiRES" | nawk '{printf "%.2f", $1 * 1.05}'`
	else
	    # use the reso from last good bin in truncate
	    set hiRES = "$newRES"
	endif
	
	# use ordinary resolution cutoff adjuster?
	#unset RESOLUTION_OK
	#goto CutResolution

	echo "Elves suggest $hiRES A as a cutoff."
	
	if($?RESOLUTION_OK) goto Merger
	goto RoughScale
    endif
    
    # check for any further mergeing problems (like what? )
    if("$lastLOG" =~ ${mergeLOG}*) then
	# no fixes for these! (that weren't considered above)
	goto UnFixable
    endif
    
    
    # now we know the problem is with a scaling script
    
    # negative scale?
    set badLOG = `grep -l "Negative scale" ${allLOGs} | head -1`
    if("$badLOG" != "") then
   
	echo "Scale went negative in $badLOG"
	
	# back it up, for user review
        cp $badLOG ${badLOG}.bad >& /dev/null
        #set badLOG = "${badLOG}.bad"
	
	# smooth B-factors?
	if(("$BFACTOR" == "batch")&&("$SCALING" == "batch")&&("$badLOG" != "$lscaleLOG")) then
	    # try smoothing B-factor only
	    set SCALING = batch
	    set BFACTOR = smooth
	    
	    echo "constraining B-factors to vary smoothly."
	    goto RoughScale
	endif
	
	# dampen shifts?
	egrep -i "^FILT|^DAMP" $RULESFILE $badSCRIPT >& /dev/null
	if($status) then
	    # no damping yet, let's do it! 
	    echo "Enabling damping function."
	    echo "Scaling will be slower, but more stable."
	    echo "$CYCLES" | nawk '{printf "DAMP 0.1 %d\n", $1/2}' |\
	    cat >> $RULESFILE
	    
	    # re-generate all scaling scripts
	    if("$badLOG" == "$refLOG") goto Scaling_Scripts
	    goto RoughScale
	else
	    # check if scaling failed once damping was released?
	    set lastcycle = `grep "Cycle" $badLOG | wc -l`
	    set lastDAMPcycle = `nawk '$3=="DAMP"{damp=$NF} END{print damp+0}' $badLOG`
	    if(($lastcycle > $lastDAMPcycle)&& ($CYCLES < 201)) then
		# just do more cycles of damping
		@ lastDAMPcycle = ( $lastcycle + 50 )
		@ CYCLES = ( $lastDAMPcycle + 50 )
		
		# add this to the rules file
		echo "DAMP 0.1 $lastDAMPcycle" >> $RULESFILE
		
		echo "Damping for $lastDAMPcycle cycles this time..."
		if("$badLOG" == "$refLOG") goto Scaling_Scripts
		goto RoughScale
	    endif
	endif
	
	# increase smoothing window? (if we are using smooth scales)
	if(( ("$badLOG" == "$lscaleLOG")||("$SCALING" == "smooth") )&&($SPACING < 30)&&(! $?TRIED_MORE_SMOOTH)) then
	    # try doubling the spacing
	    set temp = `echo $SPACING | nawk '{printf "%d", 2*$1}'`
	    if($temp > 1) then
		set SPACING = "$temp"
		set TRIED_MORE_SMOOTH
		
		echo "increasing the smooth scaling requirement to a ${SPACING}$DEG window."
		goto RoughScale
	    endif
	endif
	
	# add "tie" cards
	egrep -i "^TIE" $RULESFILE $badSCRIPT >& /dev/null
	if($status) then
	    # no "ties" between scales, might as well try it.
	    echo "forcing neighboring scales to be within 0.1 sigmas."
	    echo "TIE ROTATION 0.1" >> $RULESFILE
	    if("$badLOG" == "$lscaleLOG") echo "TIE DETECTOR 0.1" >> $RULESFILE
	    
	    # re-generate all scaling scripts
	    if("$badLOG" == "$refLOG") goto Scaling_Scripts
	    goto RoughScale
	endif
	
	
	# last resort: throw out suspect frames
	cat ${badLOG} |\
	nawk '/Negative scale factor/{print $(NF-2)}' |\
	cat >! ${tempfile}badframes
	
	# if we got some, go to bad-frame eliminator
	set badframes = `nawk 'NF>0' ${tempfile}badframes | wc -l`
	if(($badframes > 0)&&(! $?KEEP_ALL_FRAMES)) then
	    unset NO_BAD_FRAMES
	    goto BadFrames
	endif
	
    endif
    
    
    
    # too many parameters?
    set badLOG = `grep -l "Too many parameters" ${allLOGs} | head -1`
    if("$badLOG" != "") then
   	echo ""
	echo "Too many free parameters for this version of scala! "
	if("$BFACTOR" == "batch") then
	    echo "trying smooth B factors to reduce free parameters."
	    echo ""
	    set BFACTOR = "smooth"
	    goto Scaling_Scripts
	endif
	set temp = `echo $SPACING | nawk '{printf "%d", 2*$1}'`
	if(($temp > 1)&&($temp < 30)) then
	    set SPACING = "$temp"
	    set TRIED_MORE_SMOOTH
		
	    echo "increasing the smoothing window to ${SPACING}$DEG."
	    echo ""
	    goto Scaling_Scripts
	endif
	if("$SCALING" == "batch") then
	    echo "trying smooth scale factors to reduce free parameters."
	    echo ""
	    set SCALING = "smooth"
	    goto Scaling_Scripts
	endif
	echo ""
	echo "Elves cannot compensate for this problem."
	echo "You must either reduce the amount of data (frames) in this run, "
	echo "or ask your sysadmin to recompile scala with increased limits."
	echo ""
	goto UnFixable
    endif


    
    



    # BLIND fixes:
    
    # increase spacing?
    if(($SPACING < 30)&&(! $?TRIED_MORE_SMOOTH)) then
	# try doubling the spacing
	set temp = `echo $SPACING | nawk '{printf "%d", 2*$1}'`
	if($temp != "") then
	    set SPACING = "$temp"
	    set SCALING = smooth
	    set BFACTOR = smooth
	    set TRIED_MORE_SMOOTH
		
	    echo "increasing the smooth scaling requirement to a ${SPACING}$DEG window."
	    goto RoughScale
	endif
    endif
    
    # reduce spacing?
    if(($SPACING > 3)) then
	# try cutting it by 2/3
	set temp = `echo $SPACING | nawk '{printf "%d", 2*$1/3}'`
	if($temp > 1) then
	    set SPACING = "$temp"
	    set SCALING = smooth
	    set BFACTOR = smooth
	    echo "relaxing the smooth scaling requirement to a ${SPACING}$DEG window."
	    goto RoughScale
	endif
    endif

    
    
    # give up on localscaling
    if(("$badLOG" == "$lscaleLOG")&&(! $?ROUGHSCALE_ONLY)) then
	# localscaling has been failing, and we don't see why
	set ROUGHSCALE_ONLY
	
	echo "giving up on localscaling."
	#echo "copying ${rscaleMTZ} to ${lscaleMTZ}"

        # "fake" localscaling routine
	set lscaleMTZ = $rscaleMTZ
        #cp ${rscaleMTZ} ${lscaleMTZ}
	
	goto RoughScale
    endif
endif

    
# place some magic code here? 
    
    
UnFixable:
# display error messages from the log
tail -300 $lastLOG |\
nawk 'NF>1 && $2 != ":"' |\
nawk 'tolower($0) ~ /error/{for(i=1;i<3;++i){print; getline}}'

# get the name of the offending script
set badSCRIPT = `grep "$lastLOG" Makefile | grep "${SCRIPT_dir}" | nawk '{print $2}'`

echo ""
echo "Elves are not yet trained for this! "
echo "You had better start reading the scala manual: "
if($?CDOC) then
    echo "${CDOC}/scala.doc"
endif
if($?CHTML) then
    echo "${CHTML}/scala.html"
endif
echo ""
echo "Please examine $lastLOG and see if you can figure out what to do."
echo "Then edit $badSCRIPT"
echo "then type: make"
echo ""
exit 9

goto Generate













exit
Generate:
################################################################################
#
#	First, deploy useful utilities for Elves and Humans
#
################################################################################




FreeRer:
################################################################################

#######                                 ######
#        #####   ######  ######         #     #
#        #    #  #       #              #     #
#####    #    #  #####   #####   #####  ######
#        #####   #       #              #   #
#        #   #   #       #              #    #
#        #    #  ######  ######         #     #

################################################################################
#
#	create Elves version of uniqueify (with inheritance)
#
################################################################################
set SCRIPT = $SCRIPT_dir/FreeRer.com
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak

cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#  Automatically generated script for setting up consistent FREE R flags
#
#   FreeRer.com
#
#  Unlike "uniqueify", FreeRer.com can "inherit" free R flags from another file 
#  (mtz or X-plor)
#
#
# set this to wherever your awk program is
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk
#
set tempfile = \$CCP4_SCR/FreeRer\$\$

# defaults
set unfree = ""
set flagfile = ""
set outfile = FreeRed.mtz
set XPLORfile = XPLOR.cv
set FRAC = 0.05


# process command-line input
foreach arg ( \$* )
    # user may specify free-R fraction
    if("\$arg" =~ [1-9]*%) set FRAC = \`echo "\$arg" | nawk '{print (\$1 + 0)/100}'\`
    if("\$arg" =~ 0.*) set FRAC = \`echo "\$arg" | nawk 'if((\$1+0)>0) {print \$1 + 0}'\`

    # look for MTZ files
    if(("\$arg" =~ *.mtz)&&(-e "\$arg")) then
	if(! -e "\$unfree") then
	    # file to recieve free-R flags is first MTZ encountered
	    set unfree = "\$arg"	    
	else
	    # make sure that potential free-R source contains free-R flags
	    mtzdump HKLIN "\$arg" << EOF-dump >! \${tempfile}
HEAD
go
EOF-dump
	    grep "FreeR_flag" \${tempfile} >& /dev/null
	    if(! \$status) set flagfile = "\$arg"
	endif
	rm -f \${tempfile} >& /dev/null
    else
	# free-R source can also be an X-plor hkl file
	if(-e "\$arg") then
	    grep "TEST" "\$arg" >& /dev/null
	    if(! \$status) set flagfile = "\$arg"
	endif
    endif
end

if(! -e "\$unfree") then
    cat << EOF
usage: \$0 mtzfile.mtz [free-R source] [fraction[%]]

Free R flags can be inherited from "free-R source" (mtz or X-plor)
or a new set can be defined as "fraction" of the data in mtzfile.mtz
and the resulting file will be "polished" to fill in missing flags.

EOF
    exit 1
endif

# print out what we are going to do
if(-e "\$flagfile") then
    set temp = "\${flagfile}'s"
else
    set temp = \`echo \$FRAC | nawk '{print 100*\$1"%"}'\`
endif
echo "\\n\\nadding \$temp FreeR_flag to \$unfree in output files: \$outfile and \$XPLORfile"
echo ""
echo ""
# get variables from the input MTZ
mtzdump HKLIN \$unfree << EOF-dump >! \${tempfile}
HEAD
go
EOF-dump
if(\$status) goto bad
set hires = \` nawk '/Resolution Range/{getline; getline; print \$6}' \${tempfile} \`
set CELL = \` nawk '/Cell Dimensions/ {getline; getline; print}' \${tempfile} \`
set SGnum = \` nawk '/Space group/{print \$NF+0}' \${tempfile} \`
set SG = \` nawk -F "[\\047]" '/Space group/{print \$2}' \${tempfile} \`
set SG = \` nawk -v num=\$SGnum '\$1==num && NF>5{print \$4}' \${CLIBD}/symop.lib \`

grep FreeR_flag \${tempfile} >& /dev/null
if(\$status) goto flags_removed
################################################################################
#
# if input file has pre-existing FreeR_flag, I assume you don't want it anymore
# otherwise, you should just use the COMPLETE option in a run of "freerflags"
# or use the same file as the freer source
#
remove_flags:
# purge old Free R flags from \$unfree file
mtzutils hklin \$unfree hklout \${tempfile}.unfree.mtz << eof-purge
EXCLUDE FreeR_flag
eof-purge
if(\$status) goto bad
set unfree = \${tempfile}.unfree.mtz
echo "FreeR_flag removed from \$1"
flags_removed:

# if flag file has already been given, use it instead
if(-e "\$flagfile") then
    mtzdump hklin \$flagfile << EOF >&! \${tempfile}.dump2
    HEAD
    go
EOF
    grep FreeR_flag \${tempfile}.dump2  >& /dev/null
    if(! \$status) then
	# suggested file is a good mtz, so just use it
	goto add_flags
    else
	grep INDE \$flagfile >& /dev/null
	if(! \$status) then
#	    echo not done yet...
	    # this is an X-plor file.  We need to import it
	    cat  \$flagfile |\\
	    nawk '{print toupper(\$0)}' |\\
	    nawk '{for(i=1;i<NF;++i){ \\
	           if(\$i ~ /INDE/){printf "%4d %4d %4d ", \$(i+1), \$(i+2), \$(i+3)}\\
	           if(\$i ~ /TEST/){printf "%4d\\n", (\$(i+1)+1)%2}\\
		   }}' |\\
	    cat >! \${tempfile}.import
	    
	    # import the flags into an MTZ
	    f2mtz HKLIN \${tempfile}.import HKLOUT \${tempfile}.mtz << EOF-import
TITLE FREE-R flags Imported from \$flagfile
CELL \$CELL
SYMM \$SG
LABOUT H K L FreeR_flag
CTYPOUT H H H I
EOF-import
	    if(\$status) goto bad
	    if(! \$?debug) rm -f \${tempfile}.import
	    sortmtz HKLOUT \${tempfile}.sorted.mtz << EOF-sort
H K L
\${tempfile}.mtz
EOF-sort
	    if(\$status) goto bad
	    if(! \$?debug) rm -f \${tempfile}.mtz
	    # reduce to CCP4 asymmetric unit
	    reindex HKLIN \${tempfile}.sorted.mtz HKLOUT \${tempfile}.imported.mtz << EOF-reindex
EOF-reindex
	    if(\$status) goto bad
	    set flagfile = \${tempfile}.imported.mtz
	    goto add_flags
	else
	    # can't use this file
	    echo "ERROR: \$flagfile is no good!!! "
	endif
    endif
endif


make_flags:
################################################################################
# use unique to generate all possible HKLs
# if you are ever so fortunate to extend your data to 1.5A
# this FREE-R set will still be appropriate! 
#
# if you want your free-R to be a different %-age, change FREERFAC
#
unique HKLOUT \${tempfile}.unique.mtz << EOF-unique
TITLE  Unique data for \$SG
LABOUT  F=FP SIGF=SIGFP
# get RESOL, SYMM and CELL
RESO 1.5
SYMM \$SGnum
CELL \$CELL
EOF-unique
if(\$status) goto bad
#
freerflag HKLIN \${tempfile}.unique.mtz HKLOUT freeR_flag.mtz << EOF-FreeR
FREERFAC \$FRAC
END
EOF-FreeR
if(\$status) goto bad
set flagfile = freeR_flag.mtz

# the file freeRflags.mtz now contains the Free-R flags


add_flags:
################################################################################
#
#	Combine these FREE R flags with the merged data
mtzutils        \\
HKLIN1 \$unfree \\
HKLIN2 \$flagfile \\
HKLOUT \${tempfile}.freed.mtz \\
<< EOF-lastcad

RESOLUTION 1000 \$hires

INCLUDE 1 ALL
INCLUDE 2 FreeR_flag

END
EOF-lastcad
if(\$status) goto bad
if(\$status) exit 9

# polish flags here (that is, fill in holes)
freerflag HKLIN \${tempfile}.freed.mtz HKLOUT \$outfile << EOF-polish
COMPLETE FREE=FreeR_flag
END
EOF-polish
if(\$status) goto bad
echo ""
echo ""
echo "\$outfile is now identical to \$1 with Free-R flags added" \`if(-e \$flagfile) echo "from \$flagfile"\`

################################################################################
#
#	list them in X-PLOR format as well
#

# output new (polished) flags from output file
sortmtz HKLOUT \${tempfile}.sorted.mtz << EOF-sort
L K H
\$outfile
EOF-sort
if(\$status) goto bad
mtz2various hklin \${tempfile}.sorted.mtz hklout \${tempfile}.cv << EOF
OUTPUT XPLOR
MISS 0.0
LABIN FP=FreeR_flag SIGFP=FreeR_flag FREE=FreeR_flag
END
EOF
if(\$status) goto bad
nawk '/TEST= 1/{print substr(\$0, 6, 13)}' \${tempfile}.cv >! \${tempfile}_1.cv

# transform X-plor output into something X-plor will read without problems
set NREF = \`wc \${tempfile}.cv | nawk '{print \$1 +1}'\`

echo " NREFlections= \$NREF"                                         >! \$XPLORfile
echo " ANOMalous=FALSE"                                             >> \$XPLORfile
echo " DECLare NAME=TEST   DOMAin=RECIprocal   TYPE=INTE END"       >> \$XPLORfile
nawk '{print substr(\$0, 1, 18) substr(\$0, 47) }' \${tempfile}.cv     >> \$XPLORfile
if(\$status) goto bad



#########################################
# output flags from original flag file
sortmtz HKLOUT \${tempfile}.sorted.mtz << EOF-sort
L K H
\$flagfile
EOF-sort
if(\$status) goto bad
mtz2various hklin \${tempfile}.sorted.mtz hklout \${tempfile}.cv << EOF
OUTPUT XPLOR
MISS 0.0
LABIN FP=FreeR_flag SIGFP=FreeR_flag FREE=FreeR_flag
END
EOF
if(\$status) goto bad
nawk '/TEST= 1/{print substr(\$0, 6, 13)}' \${tempfile}.cv >! \${tempfile}_0.cv

# find differences between new and old free R flags
diff \${tempfile}_0.cv \${tempfile}_1.cv | grep '> ' >! \${tempfile}.diff


echo ""
echo ""
echo \`cat \${tempfile}.diff | wc -l\`" new Free HKLs assigned during polishing:"
if(\$temp == 0) then
    echo none
else
    cat \${tempfile}.diff
endif
echo "above "\`cat \${tempfile}.diff | wc -l\`" new Free HKLs assigned during polishing."
echo ""
echo ""


set temp = \`echo \$FRAC | nawk '{print 100*\$0}'\`
if(-e "\${tempfile}.unique.mtz") echo "\$flagfile was created with \${temp}% free-R flags."
if("\$flagfile" == "\${tempfile}.imported.mtz") set flagfile = \$2
echo "\$outfile is now identical to \$1 with Free-R flags added from the file \$flagfile"
echo "\$XPLORfile contains these Free-R flags in X-PLOR format"


if(\$?debug) exit
rm -f \${tempfile}* >& /dev/null


exit


bad:
echo "ACK! \07"
exit 9

EOF-script
chmod a+x $SCRIPT











set SCRIPT = $SCRIPT_dir/mtz2various.com
cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
#	general script for converting mtz to another format
#
##############################################################################
# set up awk
alias nawk $nawk
nawk 'BEGIN{print 1; exit}' >& /dev/null
if(\$status) alias nawk awk

# defaults
set mtzfile  = ${finalMTZ}
set tempfile = \${CCP4_SCR}/mtz2various_temp\$\$

# output F, phase, etc. (automatic by default)
set hiRES    = ""
set F        = Fin
set SIGF     = SIGFin
set DANO     = ""
set SIGD     = ""
set PHI      = PHIDM
set FOM      = FOMDM
set FREE     = FreeR_flag

# output files
set outfile   = outfile.cif
# supported: xplor cns tnt cif shelx fin phs
set format    = "CIF"

if("\$1" == "") goto Help
goto Setup
#  Procedure (at bottom) to read command-line args
#  mtz, Fs, or resolution
Help:
cat << EOF

usage: \$0 mtzfile.mtz [F] [PHI] outfile.fmt

where:
mtzfile.mtz  - an mtz file you want to convert
[F] [PHI]    - are the dataset names in mtzfile.mtz you want to extract
outfile.fmt  - is the output file name
fmt          - implies format:
		cif -> CIF
		hkl -> shelx
		tnt -> TNT
		fin -> XtalView
		phs -> XtalView
		fobs-> XPLOR
		cv  -> XPLOR
		cns -> CNS

EOF
exit 9
ReturnFromSetup:

# construct a LABIN card for the dump
set label = ""
if("\$F" != "")    set label = "\$label FP=\$F"
if("\$SIGF" != "") set label = "\$label SIGFP=\$SIGF"
if("\$DANO" != "") set label = "\$label DP=\$DANO"
if("\$SIGD" != "") set label = "\$label SIGDP=\$SIGD"
if("\$PHI" != "")  set label = "\$label PHIB=\$PHI"
if("\$FOM" != "")  set label = "\$label FOM=\$FOM"
if("\$FREE" != "") set label = "\$label FREE=\$FREE"

################################################################################
# extract columns of interest
mtz2various HKLIN \$mtzfile HKLOUT \${tempfile}.hkl << EOF-dump 
#>> /dev/null
RESOLUTION 1000 \$hiRES
OUTPUT \$format
LABIN \$label
END
EOF-dump
if(\$status) exit

if("\$format" == "TNT") then
    if("\$FREE" != "") then
	# extract FREE-R set
	cat \${tempfile}.hkl |\\
	nawk '\$NF=="FREE"{print substr(\$0,1,index(\$0,"FREE")-1)}' |\\
	sort +1n -2 +2n -3 +3n -4 |\\
	cat >! \${outfile}.free
    endif
    # make sure the output is properly sorted
    cat \${tempfile}.hkl |\\
    nawk '\$NF!="FREE"{print}' |\\
    sort +1n -2 +2n -3 +3n -4 |\\
    cat >! \${outfile}

    echo "TNT version of \$F \$SIGF \$PHI \$FOM in \$mtzfile is now in:"
    echo "\$outfile"
    echo "\${outfile}.free contains the Free-R flagged HKLs"
    rm -f \${tempfile}.hkl >& /dev/null
    exit
endif

# check for supported/unsupported output
if("\$format" != 'USER *') then
    # use whatever mtz2various did
    mv \${tempfile}.hkl \$outfile

    echo "\$format version of \$F \$SIGF \$DANO \$SIGD \$PHI \$FOM from \$mtzfile is at:"
    echo "\$outfile"
    exit
endif

# must have been an unsupported format (fin, phs)
if(("\$ext" == "fin")||("\$ext" == "phs")) then
    echo "name" >! \${outfile}.CRYSTAL
    echo "cell \$CELL" >> \${outfile}.CRYSTAL
    cat \${CLIBD}/symop.lib |\\
    nawk -v SG=\$SG '\$4==SG{print "spgr", tolower(\$4), \$1, \$2, \$3, tolower(\$6);\\
	getline; getline; printf "symm x,y,z"; \\
	while(NF==1){printf "; %s", tolower(\$1); getline};\\
	print "."; exit}' |\\
    cat >> \${outfile}.CRYSTAL
endif

if("\$ext" == "phs") then
    # XtalView phs file
    # should have been output as H K L F PHI FOM
    cat \${tempfile}.hkl |\\
    nawk '{printf "%4d %4d %4d %12.4f %7.4f %7.2f\\n", \$1, \$2, \$3, \$4, \$6, \$5}' |\\
    cat >! \$outfile

    echo "XtalView version of \$F \$PHI \$FOM from \$mtzfile is at:"
    echo "\$outfile"
    echo "\${outfile}.CRYSTAL"
    rm -f \${tempfile}.hkl >& /dev/null
    exit
endif


if("\$ext" == "fin") then
    # XtalView fin file
    # should have been output as H K L F SIGF D SIGD
    cat \${tempfile}.hkl |\\
    nawk 'NF>=7{F1=\$4+(\$6/2); F2=\$4-(\$6/2); SIG1=SIG2=\$5*1.4142;}\\
	  NF<7 || \$NF+0==0{F1=\$4; SIG1=\$5; F2=0; SIG2=9999}\\
	  NF>3{printf "%4d %4d %4d %12.4f %12.4f %12.4f %12.4f\\n", \$1, \$2, \$3, F1, SIG1, F2, SIG2}' |\\
    cat >! \$outfile

    echo "XtalView version of \$F \$SIGF \$DANO \$SIGD from \$mtzfile is at:"
    echo "\$outfile"
    echo "\${outfile}.CRYSTAL"
    rm -f \${tempfile}.hkl >& /dev/null
    exit
endif


####################################################################
exit
####################################################################



Setup:
# scan the command line for files
foreach arg ( \$* )
    if( "\$arg" =~ *.mtz ) then
	if(! -e "\$arg") then
	    echo "WARNING: \$arg does not exist! "
	    continue
	endif
	set mtzfile  = "\$arg"
	continue
    endif
    if("\$arg" =~ *.cif) then
        set outfile  = "\$arg"
        set format   = "CIF data_\$arg"
        continue
    endif
    if("\$arg" =~ *.hkl) then
	set outfile  = "\$arg"
	set format   = "SHELX"
	continue
    endif
    if("\$arg" =~ *.tnt) then
        set outfile  = "\$arg"
        set format   = "TNT"
        continue
    endif
    if("\$arg" =~ *.cns) then
        set outfile  = "\$arg"
        set format   = "CNS"
        continue
    endif
    if(("\$arg" =~ *.fobs)||("\$arg" =~ *.cv)) then
	set outfile  = "\$arg"
	set format   = "XPLOR"
	continue
    endif
    if("\$arg" =~ *.fin) then
        set outfile  = "\$arg"
        set format   = 'FIN'
        continue
    endif
    if("\$arg" =~ *.phs) then
        set outfile  = "\$arg"
        set format   = 'PHS'
        continue
    endif
    if( "\$arg" =~ [0-9]* ) then
	set temp = \`echo "\$arg" | nawk '\$1+0>0.1{print \$1+0}'\`
	if("\$temp" != "") set hiRES = "\$temp"
    endif
end

#get variables from mtz file
echo "go" | mtzdump hklin \$mtzfile | tee \${tempfile}mtzhead |\\
nawk '/OVERALL FILE STATISTICS/,/No. of reflections used/' |\\
nawk 'NF>10 && \$(NF-1) ~ /[FQPWADI]/' |\\
cat >! \${tempfile}mtzdmp

# set misc header values
set CELL = \`nawk '/Cell Dimensions/{getline; getline; print}' \${tempfile}mtzhead\`
set SG   = \`nawk '/Space group/{print \$5}' \${tempfile}mtzhead\`
set SGnum = \` nawk '/Space group/{print \$NF+0}' \${tempfile}mtzhead \`
set SG = \` nawk -F "[\\047]" '/Space group/{print \$2}' \${tempfile}mtzhead \`
set SG = \` nawk -v num=\$SGnum '\$1==num && NF>5{print \$4}' \${CLIBD}/symop.lib \`
set mtzRES = \`nawk '/Resolution Range/{getline;getline;print \$6}' \${tempfile}mtzhead\`
rm -f \${tempfile}mtzhead >& /dev/null

# use completeness, or F/sigF to pick default F
cat \${tempfile}mtzdmp |\\
nawk '\$(NF-1) == "F"{F=\$NF; meanF=\$8; reso=\$(NF-2); comp=substr(\$0,32)+0; \\
      getline; S=\$NF; if(\$8) meanF /= \$8; print F, S, reso, comp, meanF;}' |\\
sort +2n -3 +3nr -4 +4nr >! \${tempfile}F

# same for D/sigD for anomalous diffs
cat \${tempfile}mtzdmp |\\
nawk '\$(NF-1) == "D"{D=\$NF; meanD=\$8; reso=\$(NF-2); comp=substr(\$0,32)+0; \\
      getline; S=\$NF; if(\$8) meanD /= \$8; print D, S, reso, comp, meanD;}' |\\
sort +2n -3 +3nr -4 +4nr >! \${tempfile}D


# and extract all dataset types/labels
cat \${tempfile}mtzdmp |\\
nawk 'NF>2{print \$(NF-1), \$NF, " "}' |\\
cat >! \${tempfile}cards

#clean up
rm -f \${tempfile}mtzdmp

# pick F with best resolution, or <F>/<sigma(F)>
set F    = \`head -1 \${tempfile}F\`
if(\$#F > 2) then
    set SIGF = \$F[2]
    set F    = \$F[1]
endif

# see if default phase is available in this file
grep "P \$PHI" \${tempfile}cards >& /dev/null
if(\$status) then
    # pick most recently-added phase
    set temp = \`nawk '/^P/{print \$2}' \${tempfile}cards  | tail -1\`
    if("\$temp" != "") set PHI = "\$temp"
endif
# see if default FOM is available in this file
grep "W \$FOM" \${tempfile}cards >& /dev/null
if(\$status) then
    # pick most recently-added FOM
    set temp = \`nawk '/^W/{print \$2}' \${tempfile}cards | tail -1\`
    if("\$temp" != "") then
	set FOM = "\$temp"
    else
	# there are no FOMs in this mtz file
	set FOM = ""
    endif
endif

# see if user specified an F, Phase, or FOM
set last = "F"
foreach arg ( \$* )
    set temp = \`grep " \$arg " \${tempfile}cards\`
    if("\$temp" =~ F*) then
	set F = "\${arg}"
	set last = "F"
	# assign most likely sigma too
	set temp = \`nawk -v arg="\$arg" '\$1==arg{print \$2}' \${tempfile}F\`
	if(\$#temp == 1) set SIGF = "\$temp"
	continue
    endif
    if("\$temp" =~ D*) then
        set DANO = "\${arg}"
	set last = "D"
        # assign most likely sigma too
        set temp = \`nawk -v arg="\$arg" '\$1==arg{print \$2}' \${tempfile}D\`
        if(\$#temp == 1) set SIGD = "\$temp"
        continue
    endif
    if("\$temp" =~ Q*) then
	if("\$last" == "F") set SIGF = "\${arg}"
	if("\$last" == "D") set SIGD = "\${arg}"
    endif
    if("\$temp" =~ P*) set PHI  = "\${arg}"
    if("\$temp" =~ W*) set FOM  = "\${arg}"


    # detect particular out-of-context format specifiers
    set arg = \`echo "\$arg" | nawk '{print tolower(\$1)}'\`
    if("\$arg" == "shelx") set format = SHELX
    if("\$arg" == "tnt")   set format = TNT
    if("\$arg" == "cif")   set format = CIF
    if("\$arg" == "xplor") set format = XPLOR
    if("\$arg" == "cns")   set format = CNS
    if("\$arg" == "xtalview") then
        if("\$PHI" == "") then
            set format = "FIN"
        else
            set format = "PHS"
        endif
    endif
end

# check for DANO, if we might need one
if(("\$format" == "FIN")&&("\$DANO" == "")) then
    # grab pattern from name
    set temp = \`echo \$F | nawk '{print substr(\$1,2)}'\`
    set DANO = \`nawk -v patt=\$temp '\$1~ patt"\$" {print \$1; exit}' \${tempfile}D\`
    # get likely sigma too
    set SIGD = \`nawk -v arg="\$DANO" '\$1==arg{print \$2}' \${tempfile}D\`
endif

# now check and see if the sigma is really there
grep "Q \$SIGF" \${tempfile}cards >& /dev/null
if(\$status) then
    # no sigma availale
    set SIGF = ""
endif
grep "Q \$SIGD" \${tempfile}cards >& /dev/null
if(\$status) then
    # no sigma availale 
    set SIGD = ""
endif
grep "P \$PHI" \${tempfile}cards >& /dev/null
if(\$status) then
    # no phase availale
    set PHI = ""
endif
grep "W \$FOM" \${tempfile}cards >& /dev/null
if(\$status) then
    # no weight availale
    set FOM = ""
endif
grep " \$FREE" \${tempfile}cards >& /dev/null
if(\$status) then
    # no weight availale
    set FREE = ""
endif


rm -f \${tempfile}cards \${tempfile}F \${tempfile}D >& /dev/null

# set resolution (if none previously given)
if("\$hiRES" == "") set hiRES = "\$mtzRES"

# store filename extension/format
set ext = \`echo \$outfile | nawk 'BEGIN{FS="."} {print \$NF}'\`
if("\$format" == "CIF") then
    # needs an extra word
    set format = "CIF data_dump"
endif
if("\$format" == "FIN") then
    # not supported by mtz2various
    set format = 'USER *'
    set ext = fin
    set PHI = ""
    set FOM = ""
    set FREE = ""
endif
if("\$format" == "PHS") then
    # not supported by mtz2various
    set format = 'USER *'
    set ext = phs
    set SIGF = ""
    set DANO = ""
    set SIGD = ""
    set FREE = ""
endif

goto ReturnFromSetup

####################
# future:
better handling of shelx
multiple datasets/mtzs?

EOF-script
chmod a+x $SCRIPT
















###############################################################################
#
#	best FH calculator
#
###############################################################################
set SCRIPT = $SCRIPT_dir/bestFH.com
cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
#
#	bestFH.com 				-James Holton 8-2-04
#
#	calculate a "best" estimate of F for the heavy atoms
#	alone (a la B. W. Matthews 1966 Acta Cryst 20, 230-239)
#	by combining ALL anomalous and isomorphous differences.  
#	(as many as you want)
#
#	FH should give cleaner Pattersons, difference Fouriers,
#	and direct methods.
#
#	Do NOT combine differences from data sets expected to have
#	different heavy atom locations.  That would be silly.
#
#
# set this to wherever your awk program is
alias nawk $nawk
nawk 'BEGIN{print}' >& /dev/null
if(\$status) alias nawk awk

set mtzfile   = "${finalMTZ}"		# MAD data set
set outfile   = "./FH.mtz"		# contains FH, SIGFH
set shelxfile = "./fh.hkl"		# same thing, shelx format
set fourfile  = "./FH_Four.map"		# Phased Fourier of FH (if phase is available)
set pattfile  = "./FH_Patt.map"		# Combined Patterson map
set wpattfile = "./wFH_Patt.map"	# Combined, weighted Patterson map
set logfile   = "./bestFH.log"		# all the CCP4 logs

set tempfile = \${CCP4_SCR}/bestFH_temp\$\$


# initialize internal variables
set loRES = 1000
set hiRES = ""
set order = ""		# "order" of increasing dispersive signal data sets

set DATA_cutoff = 1	# sigma cutoff      (not used)
set MAX_dano 		# upper Dano cutoff (not used)
set MAX_diso		# upper Diso cutoff (not used)
set scaling = scale	# apply one scale to each difference data set
#set scaling = isotropic # use a B-factor too (wise? )

if(\$#argv == 0) goto Help

# this procedure (re)sets most of the above variables
# from either the provided files, or the command line
goto Setup

Help:
cat << EOF

usage: \$0 alldata.mtz

where:
alldata.mtz	- contains all the Fs and DANOs you want to combine

\`basename \$0\` will calculate an (unscaled) estimate of FH, the scattering factor
of the heavy metal alone, by combining any and all anomalous and isomorphous
(dispersive) differences made available to it.  The procedure is derived
from Matthews et. al. 1966 Acta Cryst 20, 230.

FH is better than Dano or Diso alone, both because the averaging tends
to give better signal/noise, and because considering BOTH isomorphous
and anomalous simultaneously reduced the systematic errors arising
from cross-terms in difference intensity data.  FH usually gives 
cleaner Pattersons and difference Fouriers, as well as improved 
performace of direct methods programs.

Procedure:
The isomorphous difference data sets are computed from the provided Fs,
(assigned a sign), scaled together, and a sigma-weighed mean isomorphous 
difference is computed.  A similar procedure is applied to the anomalous 
differences.

The average Diso and Dano data sets are then scaled to each other, and
combined for the final estimate of FH = sqrt(Diso^2 + k * Dano^2).

REMEMBER: Do NOT combine differences from data sets expected to have
different heavy atom locations.  That would be silly.  If you don't
know why, you should read Matthews et. al. 1966 Acta Cryst 20, 230.

EOF

rm -f \${tempfile}Fs >& /dev/null
rm -f \${tempfile}Ds >& /dev/null
rm -f \${tempfile}Ps >& /dev/null
rm -f \${tempfile}Fpairs >& /dev/null
exit 2
#
#   This procedure (at the bottom of the script) does the following
#   1) scan the command line for the mtz file
#   2) set the CELL, SG, and other variables
#   3) generate dataset name lists: \${tempfile}Fs and \${tempfile}Ds
Return_from_Setup:






################################################################
#   initial report on intended program flow
################################################################
# start the logfile
echo "" >! \$logfile
echo "\$0 \$*" >> \$logfile

if(-e "\$mtzfile") then
    echo "calculating FH from data in \$mtzfile" | tee -a \$logfile
endif

echo "" | tee -a \$logfile
echo "resolution \$loRES \$hiRES" | tee -a \$logfile
# get a "better" resolution limit for scaling?
set scaleRES = "\$loRES \$hiRES"

# count how many datasets we have
set Fs = \`cat \${tempfile}Fs | wc -l\`
set Ds = \`cat \${tempfile}Ds | wc -l\`

# trivial assignments of dispersive difference order
# (2 datasets -> doesn't matter)
# (1 dataset  -> unusable)
if(\$Fs < 3) set order = \`nawk '{print \$1}' \${tempfile}Fs\`
if(\$Fs < 2) set order = ""

# if user doesn't care, pick Diso ordering automatically
if(("\$order" == "")&&(\$Fs > 2)) then
    
    echo -n "Evaluating difference data "
    echo -n "" >! \${tempfile}diso_dano
    
    # make a list of essential pairs
    cat \${tempfile}Fs |\\
    nawk '{++n; F[n]=\$1} \\
          END{for(i=1;i<=n;++i){for(j=i;j<=n;++j){if(i!=j){\\
              print F[i] " - " F[j];}}}}' |\\
    cat >! \${tempfile}Fpairs
    
    foreach pair ( \`nawk '{print NR}' \${tempfile}Fpairs\` )
	# retrieve the pair  
	set F1 = \`nawk -v pair=\$pair 'NR==pair{print \$1}' \${tempfile}Fpairs\`
	set F2 = \`nawk -v pair=\$pair 'NR==pair{print \$3}' \${tempfile}Fpairs\`
	
	# and get the sigmas too
	set SIGF1 = \`nawk -v F=\$F1 '\$1==F{print \$2}' \${tempfile}Fs\`
	set SIGF2 = \`nawk -v F=\$F2 '\$1==F{print \$2}' \${tempfile}Fs\`
	
	# also, get magnitudes of anomalous diffs
	if(\$Ds != 0) then
	    @ i = ( ( \$pair  % \$Ds ) + 1 )
	    set DANO    = \`nawk -v n=\$i 'NR==n{print \$1}' \${tempfile}Ds\`
	    set SIGDANO = \`nawk -v n=\$i 'NR==n{print \$2}' \${tempfile}Ds\`

	    set DANOcards = "DPH1=\$DANO SIGDPH1=\$SIGDANO"
	else
	    set DANOcards = ""
	endif
    
	# make some ordinary scaleit cards
	cat << EOF-scaleitin >! \${tempfile}scaleit.in
RESOLUTION \$scaleRES
refine \$scaling
weight
LABIN FP=\$F1 SIGFP=\$SIGF1 -
      FPH1=\$F2 SIGFPH1=\$SIGF2 \$DANOcards
END
EOF-scaleitin
	
	# entertainment
	echo -n "."
	
	# put the labels in the log file
	echo -n "\$F1 \$F2 \$DANO " >> \${tempfile}diso_dano
	
	# run scaleit to get Diso and Dano
	cat \${tempfile}scaleit.in |\\
	scaleit HKLIN \$mtzfile HKLOUT /dev/null |\\
	nawk '/Sc_kraut SCALE/{iso=index(\$0,"diso")-4; ano=index(\$0,"<ano>")+3}\\
	/THE TOTALS/{print substr(\$0,iso)+0, substr(\$0,ano)+0}' |\\
	cat >> \${tempfile}diso_dano
	
	# \${tempfile}diso_dano has format: F1 F2 DANOF3  <Diso> <Dano>
    end
    rm -f \${tempfile}Fpairs >& /dev/null
    rm -f \${tempfile}scaleit.in >& /dev/null

    # get the single largest isomorphous difference
    set order = \`sort -n +3 \${tempfile}diso_dano | tail -1 | nawk '{print \$2}'\`
    if("\$order" == "") then
	echo ""
	echo "no isomorphous differences"
	echo "estimating FH requires isomorphous/dispersive and anomalous differences! "
	echo "Therefore: \$mtzfile needs to contain at least two columns of Fs"
	echo "sorry"
	goto Clean_up
    endif

    # now order the remaining datasets with increasing distance from this "native"
    cat \${tempfile}diso_dano |\\
    nawk -v ref=\$order 'BEGIN{print ref, 0, "order"} \\
       \$1==ref{print \$2, \$4} \$2==ref{print \$1, \$4}' |\\
    sort -n +1 >! \${tempfile}order
    # \${tempfile}order has format: F diso(from Fref)
    
    # store the new order in a variable
    set order = \`nawk '{print \$1}' \${tempfile}order\`

    # make sure nothing bad happened (this isn't exactly ergodic! )
    set Fs = \`cat \${tempfile}Fs | wc -l\`
    if(\$#order != \$Fs) then
	# something has gone horribly wrong
	echo "ERROR: unable to determine the best order of"
	echo "       isomorphous/dispersive differences"
	echo "sorry! "
	echo ""
	goto Help
    endif
    
    # reorder the Dano list too (doesn't really matter)
    cat \${tempfile}diso_dano \${tempfile}Ds |\\
    nawk 'NF>2{dano[\$3]=\$5} \\
          NF==2{print \$0, dano[\$1]}' |\\
    sort -nru +2 |\\
    nawk '{print \$1, \$2}' >! \${tempfile}
    mv \${tempfile} \${tempfile}Ds >& /dev/null

    # clean up
    rm -f \${tempfile}scaleit.in >& /dev/null
    rm -f \${tempfile}diso_dano >& /dev/null
endif

# whatever its source, put the f' order in a file
echo "\$order" |\\
nawk 'BEGIN{RS=" "} NF==1{++i; print \$1, "order"}' |\\
cat >! \${tempfile}order

# re-order the Fs list
cat \${tempfile}Fs \${tempfile}order |\\
nawk '\$NF!="order"{sig[\$1]=\$2} \\
      \$NF=="order"{print \$1, sig[\$1]}' |\\
cat >! \${tempfile}
mv \${tempfile} \${tempfile}Fs >& /dev/null
set Fs = \`cat \${tempfile}Fs | wc -l\`



# print out final ordering results:
echo ""
echo -n " f' order: "
cat \${tempfile}Fs | nawk '{printf "%s ", \$1} END{print ""}'

echo -n " f"\\"" order: "
cat \${tempfile}Ds | nawk '{printf "%s ", \$1} END{print ""}'

# clean up
rm -f \${tempfile}order >& /dev/null


# jump ahead if there are no anomalous datasets (why bother?)
if("\$Ds" == 0) then
    # make sure this doesn't exist (this is a signal later on)
    rm -f \${tempfile}dano.mtz >& /dev/null
    goto calculate_iso
endif

# no need to weigh anomalous datasets if there is only one of them
if("\$Ds" == 1) then
    # all we need to do is rename the dataset
    set temp = \`cat \${tempfile}Ds\`
    # better than crashing, (I guess)
    if(\$#temp != 2) set temp = ( \$temp \$temp )

    echo "extracting \$temp[1] as Dano"
    cad hklin1 \$mtzfile hklout \${tempfile}dano.mtz << EOF-cad >> \$logfile
    LABIN FILE 1 E1=\$temp[1] E2=\$temp[2]
    CTYPO FILE 1 E1=F        E2=Q
    LABOU FILE 1 E1=Dano     E2=SIGDano
EOF-cad

    # go on to calculate isomorphous differences
    goto calculate_iso
endif

weigh_ano:
echo ""
echo "weighting anomalous differences"

# extract the anomalous differences, and treat them as Fs
cat \${tempfile}Ds |\\
nawk '{printf "%s %s ", \$1, \$2} END{print ""}' |\\
nawk 'BEGIN{printf "LABIN FILE 1 ";} \\
{for(i=1;i<=NF;++i){printf "E%d=%s ", i, \$i}; printf "\\nCTYPIN FILE 1 "; \\
 for(i=1;i<=NF;i+=2){printf "E%d=F E%d=Q ", i, i+1}; print ""}' |\\
cad HKLIN1 \$mtzfile HKLOUT \${tempfile}danos.mtz >> \$logfile

# put anomalous diffs on the same scale
set i = 1
echo -n "" >! \${tempfile}scaleit.log
while( \$i <= \$Ds )
    cat << EOF-scaleitin >! \${tempfile}scaleit.in
RESOLUTION \$scaleRES
#refine scale
refine \$scaling
weight
EOF-scaleitin

    # make the LABIN card (not too many FPHs! )
    head -1 \${tempfile}Ds |\\
    nawk '{printf "LABIN FP=%s SIGFP=%s ", \$1, \$2}' |\\
    cat >> \${tempfile}scaleit.in
    
    # no more than 6 at a time
    cat \${tempfile}Ds |\\
    nawk -v first=\$i '{++n} n>=first && n<(first+6){++i;\\
	printf "-\\nFPH%d=%s SIGFPH%d=%s ", i, \$1, i, \$2} \\
    END {print "\\nEND"}' |\\
    cat >> \${tempfile}scaleit.in
    
    # now actually run scaleit
    cat \${tempfile}scaleit.in |\\
    scaleit HKLIN \${tempfile}danos.mtz \\
            HKLOUT \${tempfile}danoscaled.mtz |\\
     tee -a \${tempfile}scaleit.log >> \$logfile
    
    # accumulate scaled datasets
    mv \${tempfile}danoscaled.mtz \${tempfile}danos.mtz >& /dev/null
    @ i = ( \$i + 6 )
end
mv \${tempfile}danos.mtz \${tempfile}danoscaled.mtz >& /dev/null

# print out "weights" (effective weighting is 1/scale^2)
cat \${tempfile}scaleit.log |\\
nawk '/APPLICATION OF SCALES/,/--------------------------/' |\\
nawk '\$1 == "Derivative"{++i; print \$1, i, \$3}' >! \${tempfile}scales

cat \${tempfile}Ds \${tempfile}scales |\\
nawk 'NF==2{++n; label[n]=\$1; if(length(\$1)>maxlen) maxlen=length(\$1)}\\
    \$1 == "Derivative"{w=0; if(\$3+0!=0) w=1/(\$3*\$3)\\
    printf "%-" maxlen "s : %.3f\\n", label[\$2], w}' |\\
sort -nr +2


rm -f \${tempfile}scaleit.log >& /dev/null
rm -f \${tempfile}scales      >& /dev/null


combine_ano:
echo "combining anomalous differences into Dano"

# now do a sigma-weighted average of all anomalous diffs
set Ds = \`cat \${tempfile}Ds | wc -l\`
echo \$Ds | nawk '{print "NREF -1"; print "FORMAT \\047(3i5,"\$1*2"f15.7)\\047"}' |\\
mtzdump HKLIN \${tempfile}danoscaled.mtz |\\
nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' |\\
nawk '! /[A-Z]/ && NF>3{HKL=substr(\$0,1,15); sum=norm=n="";\\
    # run down list of D, sigD \\
    for(i=4;i<NF;i+=2){D=\$i;sigD=\$(i+1); w=0; \\
	# sigma of 0 means zero, but no weight \\
	if(sigD+0==0) norm+=0;\\
	# add up weigted sum \\
        if(sigD+0>0){w=1/(sigD*sigD); sum+=w*D; norm+=w;}}\\
    # do not print anything for "all-missing" HKLs \\
    # print zero for all-zero hkls \\
    if(norm==0) print HKL, 0, 0; \\
    # print sigma-weighted average (abs value? ) \\
    if(norm+0 > 0) print HKL, sum/norm, 1/sqrt(norm)}' |\\
cat >! \${tempfile}dano.hkl
rm -f \${tempfile}danoscaled.mtz >& /dev/null


f2mtz HKLIN \${tempfile}dano.hkl HKLOUT \${tempfile}sortme.mtz << EOF-f2mtz >> \$logfile
CELL \$CELL
SYMM \$SGnum
LABOUT H K L Dano SIGDano
CTYPO  H H H F Q
EOF-f2mtz
rm -f \${tempfile}dano.hkl >& /dev/null

echo "H K L" |\\
sortmtz HKLIN \${tempfile}sortme.mtz HKLOUT \${tempfile}dano.mtz >> \$logfile
rm -f \${tempfile}sortme.mtz

# indicate finish
echo ""

# summed anomalous diffs should now be loaded into:
# \${tempfile}dano.mtz, labeled as Dano SIGDano



calculate_iso:
###########################################################
# now we need to calculate isomorphous diffs 
# before we can treat them as we did the anomalous diffs


# jump ahead if there are no isomorphous datasets (why bother?)
if("\$Fs" < 2) then
    # make sure this doesn't exist (this is a signal later on)
    rm -f \${tempfile}diso.mtz >& /dev/null
    goto compute_k
endif

if(\$Fs == 2) then
    echo "calculating isomorphous (dispersive) difference as Diso"
else
    echo "subtracting isomorphous (dispersive) differences"
    echo "note: f' differences should all have the same sign! "
endif

# make sure this doesn't already exist
rm -f \${tempfile}disos.mtz >& /dev/null

# make list with largest difference first
cat \${tempfile}Fs |\\
nawk '{++n; F[n]=\$1} \\
    END{for(i=1;i<=n;++i){for(j=n;j>i;--j){if(i!=j){\\
    print F[i] " - " F[j];}}}}' |\\
cat >! \${tempfile}Fpairs
set Fpairs = \`cat \${tempfile}Fpairs | wc -l\`
set pair = 0

while ( \$pair < \$Fpairs )
    @ pair = ( \$pair + 1 )
    # retrieve the pair  
    set F1 = \`nawk -v pair=\$pair 'NR==pair{print \$1}' \${tempfile}Fpairs\`
    set F2 = \`nawk -v pair=\$pair 'NR==pair{print \$3}' \${tempfile}Fpairs\`
    
    # and get the sigmas too
    set SIGF1 = \`nawk -v F=\$F1 '\$1==F{print \$2}' \${tempfile}Fs\`
    set SIGF2 = \`nawk -v F=\$F2 '\$1==F{print \$2}' \${tempfile}Fs\`
    
    echo "\${F1}-\${F2}"
    
    # extract these columns from the file
    echo "LABIN FILE 1 E1=\$F1 E2=\$SIGF1 E3=\$F2 E4=\$SIGF2" |\\
    cad HKLIN1 \$mtzfile HKLOUT \${tempfile}dump.mtz >> \$logfile
    
    # dump the Fs as text, calculate F1-F2 sqrt(SIG1^2+SIG2^2)
    echo "NREF -1" |\\
    mtzdump HKLIN \${tempfile}dump.mtz |\\
    nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' |\\
    nawk '! /[A-Z?]/ && NF>1{HKL=substr(\$0,1,13);\\
        F1=substr(\$0,14,12); F2=substr(\$0,36,10);\\
	SIGF1=substr(\$0,26,10); SIGF2=substr(\$0,46,10); \\
     print HKL, F1-F2, sqrt(SIGF1*SIGF1 + SIGF2*SIGF2)}' |\\
    cat >! \${tempfile}diso.hkl
    rm -f \${tempfile}dump.mtz >& /dev/null
    
    # special case: only one difference set
    if(\$Fs == 2) set pair = ""

    # read back into mtz format
    f2mtz HKLIN \${tempfile}diso.hkl HKLOUT \${tempfile}sortme.mtz << EOF-f2mtz >> \$logfile
    CELL \$CELL
    SYMM \$SGnum
    LABOUT H K L Diso\${pair} SIGDiso\${pair}
    CTYPO  H H H F Q
EOF-f2mtz
    rm -f \${tempfile}diso.hkl >& /dev/null
    
    if("\$pair" == "") set pair = 999    
    
    # sort it (just in case)
    echo "H K L" |\\
    sortmtz HKLIN \${tempfile}sortme.mtz HKLOUT \${tempfile}diso.mtz >> \$logfile
    rm -f \${tempfile}sortme.mtz
    
    # add columns into an mtz
    if(-e \${tempfile}disos.mtz) then
	cad HKLIN1 \${tempfile}disos.mtz HKLIN2 \${tempfile}diso.mtz \\
	    HKLOUT \${tempfile}cadded.mtz << EOF-cadadd >> \$logfile
	LABIN FILE 1 ALL
	LABIN FILE 2 ALL
EOF-cadadd
	# update the cumulative mtz
	mv \${tempfile}cadded.mtz \${tempfile}disos.mtz
    else
	# create the cumulative mtz
	mv \${tempfile}diso.mtz \${tempfile}disos.mtz
    endif
end


# check for trivial case: one difference dataset
if(\$Fs <= 2) then
    # no need to calculate relative weights for one dataset
    mv \${tempfile}disos.mtz \${tempfile}diso.mtz
    
    goto compute_k
endif


weigh_iso:
echo "weighting isomorphous differences"

# now put all these differences on the same scale
set pair = 1
set Fpairs = \`cat \${tempfile}Fpairs | wc -l\`
echo -n "" >! \${tempfile}scaleit.log 
while ( \$pair <= \$Fpairs )
    cat << EOF-scaleitin >! \${tempfile}scaleit.in
RESOLUTION \$scaleRES
#refine scale
refine \$scaling
weight
EOF-scaleitin
    # first one is the reference
    echo -n "LABIN FP=Diso1 SIGFP=SIGDiso1 " >> \${tempfile}scaleit.in
    
    # scale 6 at a time
    cat \${tempfile}Fpairs |\\
    nawk -v first=\$pair '{++n} n>=first && n<(first+6){++i;\\
	printf "-\\nFPH%d=Diso%d SIGFPH%d=SIGDiso%d ", i, n, i, n;}\\
	END{print "\\nEND"}' |\\
    cat >> \${tempfile}scaleit.in
    
    # run scaleit
    cat \${tempfile}scaleit.in |\\
    scaleit HKLIN \${tempfile}disos.mtz \\
            HKLOUT \${tempfile}disoscaled.mtz  |\\
     tee -a \${tempfile}scaleit.log >> \$logfile
    rm -f \${tempfile}scaleit.in >& /dev/null

    @ pair = ( \$pair + 6 )
    
    # output is input for next round of scaling
    mv \${tempfile}disoscaled.mtz \${tempfile}disos.mtz >& /dev/null
end
mv \${tempfile}disos.mtz \${tempfile}disoscaled.mtz >& /dev/null


# print out "weights" (effective weighting is 1/scale^2)
cat \${tempfile}scaleit.log |\\
nawk '/APPLICATION OF SCALES/,/--------------------------/' |\\
nawk '\$1 == "Derivative"{++n; print \$1, n, \$3}' >! \${tempfile}scales

cat \${tempfile}Fpairs \${tempfile}scales |\\
nawk '\$2 == "-"{++n; label[n]=\$1 \$2 \$3; if(length(label[n])>maxlen) maxlen=length(label[n])}\\
    \$1 == "Derivative"{w=0; if(\$3+0!=0) w=1/(\$3*\$3)\\
    printf "%-" maxlen "s : %.3f\\n", label[\$2], w}' |\\
sort -nr +2

rm -f \${tempfile}scaleit.log >& /dev/null
rm -f \${tempfile}scales      >& /dev/null



combine_iso:
echo "combining isomorphous differences into Diso"

# add these scaled data sets together (sigma-weighted again)
# (hopefully, our "ordering" procedure has made sure all
#  these differences have the same sign)
set Fpairs = \`cat \${tempfile}Fpairs | wc -l\`
rm -f \${tempfile}Fpairs >& /dev/null

echo \$Fpairs | nawk '{print "NREF -1"; print "FORMAT \\047(3i5,"\$1*2"f15.7)\\047"}' |\\
mtzdump HKLIN \${tempfile}disoscaled.mtz |\\
nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' |\\
nawk '! /[A-Z]/ && NF>3{HKL=substr(\$0,1,15); sum=norm=n="";\\
    # run down list of D, sigD \\
    for(i=4;i<NF;i+=2){D=\$i;sigD=\$(i+1); w=0; \\
	# sigma of 0 means zero, but no weight \\
	if(sigD+0==0) norm+=0;\\
	# add up weigted sum \\
        if(sigD+0>0){w=1/(sigD*sigD); sum+=w*D; norm+=w;}}\\
    # do not print anything for "all-missing" HKLs \\
    # print zero for all-zero hkls \\
    if(norm==0) print HKL, 0, 0; \\
    # print sigma-weighted average (abs value?) \\
    if(norm+0 > 0) print HKL, sum/norm, 1/sqrt(norm)}' |\\
cat >! \${tempfile}diso.hkl
rm -f \${tempfile}disoscaled.mtz >& /dev/null

# read the averaged isomorphous differences back into an mtz
f2mtz HKLIN \${tempfile}diso.hkl HKLOUT \${tempfile}sortme.mtz << EOF-f2mtz >> \$logfile
CELL \$CELL
SYMM \$SGnum
LABOUT H K L Diso SIGDiso
CTYPO  H H H F Q
EOF-f2mtz
rm -f \${tempfile}diso.hkl >& /dev/null

echo "H K L" |\\
sortmtz HKLIN \${tempfile}sortme.mtz HKLOUT \${tempfile}diso.mtz >> \$logfile
rm -f \${tempfile}sortme.mtz

# indicate finish



compute_k:
##############################################################
# now we need the "k" that will put Diso and Dano on the same
# scale: k/2 = <|Diso|>/<|Dano|>

##############################################################
echo ""

# handle special cases
if((\$Ds == 0)&&(\$Fs <= 1)) then
    # this should never happen, but...
    echo "ERROR: no difference data in \${mtzfile}! "
    goto Help
endif
if(\$Ds == 0) then
    # anomalous difference data is totally missing
    # just rename the isomorphous differences
    echo "WARNING: treating Diso data as FH (FH is better with Dano and Diso)"
    cad hklin1 \${tempfile}diso.mtz hklout \${tempfile}sortme.mtz << EOF >> \$logfile
    labin file 1 E1=Diso E2=SIGDiso
    labou file 1 E1=FH   E2=SIGFH
EOF
    rm -f \${tempfile}diso.mtz >& /dev/null
    rm -f \${tempfile}dano.mtz >& /dev/null
    set wpattfile = ""

    goto sort_final
endif
if(\$Fs <= 1) then
    # isomorphous difference data is totally missing
    # just re-name the anomalous differences
    echo "WARNING: treating Dano data as FH (FH is better with Dano and Diso)"
    cad hklin1 \${tempfile}dano.mtz hklout \${tempfile}sortme.mtz << EOF >> \$logfile
    labin file 1 E1=Dano E2=SIGDano
    labou file 1 E1=FH   E2=SIGFH
EOF
    rm -f \${tempfile}diso.mtz >& /dev/null
    rm -f \${tempfile}dano.mtz >& /dev/null
    set wpattfile = ""

    goto sort_final
endif

##############################################################
echo -n "scaling Diso to Dano  "

# use scaleit to put Diso and Dano on the same scale
cad HKLIN1 \${tempfile}diso.mtz HKLIN2 \${tempfile}dano.mtz \\
HKLOUT \${tempfile}diso_dano.mtz << EOF-cad >> \$logfile
LABIN FILE 1 ALL
LABIN FILE 2 ALL
EOF-cad
rm -f \${tempfile}diso.mtz >& /dev/null
rm -f \${tempfile}dano.mtz >& /dev/null


scaleit HKLIN \${tempfile}diso_dano.mtz \\
 HKLOUT \${tempfile}scaled.mtz << EOF-scaleit | tee \${tempfile}scaleit.log >> \$logfile
RESOLUTION \$scaleRES
#refine scale
refine \$scaling
weight
LABIN FP=Diso SIGFP=SIGDiso FPH1=Dano SIGFPH1=SIGDano
end
EOF-scaleit
rm -f \${tempfile}diso_dano.mtz >& /dev/null
rm -f \${tempfile}scaleit.in >& /dev/null


# print out "k" value from scaling
cat \${tempfile}scaleit.log |\\
nawk '/APPLICATION OF SCALES/,/--------------------------/' |\\
nawk '\$1 == "Derivative"{printf "k= %.3f\\n", 2*\$3}'

rm -f \${tempfile}scaleit.log >& /dev/null


combine_diso_dano:
##############################################################
# now combine Diso and Dano as the "best" estimate of total FH

echo "calculating FH = sqrt( Diso^2 + (k/2)^2 * Dano^2 )"

# also do a "Patterson weight" of 1/(sigma(FH^2))^2
echo "NREF -1" |\\
mtzdump HKLIN \${tempfile}scaled.mtz |\\
nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' |\\
nawk '! /[A-Z]/ && NF>1{ HKL=substr(\$0,1,13); \\
    Diso=substr(\$0,14,12); SIGDiso=substr(\$0,26,10); \\
    Dano=substr(\$0,36,10); SIGDano=substr(\$0,46,10); \\
    FH=fh=sqrt(Diso*Diso + Dano*Dano); if(fh==0) fh=1;\\
    varFH  = (Diso*SIGDiso/fh)^2 + (Dano*SIGDano/fh)^2;\\
    varFH2 = 4 * FH^2 * varFH; \\
    W=0; if(varFH) W = 1/sqrt(varFH); if(W>maxW) maxW=W;\\
    print HKL, FH, sqrt(varFH), W} \\
    END{if(maxW) print 1/maxW > "'\${tempfile}'norm"}' |\\
cat >! \${tempfile}FH.hkl

set norm = \`tail -1 \${tempfile}norm\`
rm -f \${tempfile}norm >& /dev/null
if(\$#norm != 1) set norm = 1

# read FH back into mtz (finally)
f2mtz HKLIN \${tempfile}FH.hkl HKLOUT \${tempfile}sortme.mtz << EOF-f2mtz >> \$logfile
CELL \$CELL
SYMM \$SGnum
LABOUT H K L FH SIGFH W
CTYPO  H H H F Q W
SCALE  1 1 1 1 1 \$norm
EOF-f2mtz
rm -f \${tempfile}FH.hkl >& /dev/null


sort_final:
# sort it (for good measure)
echo "H K L" |\\
sortmtz HKLIN \${tempfile}sortme.mtz HKLOUT \$outfile >> \$logfile
if((! \$status)&&(-e "\$outfile")) then
    echo "\$outfile is ready."
else
    echo "ERROR! see \$logfile for what happened..."
    exit 9
endif

rm -f \${tempfile}sortme.mtz
##############################################################


# use scaleit to get the recommended DIFF
scaleit hklin \$outfile << EOF >! \${tempfile}scaleit
analyze
labin FP=FH SIGFP=SIGFH FPH1=FH SIGFPH1=SIGFH
SCALE FPH1 0.000001
END
EOF
if("\$DIFF" == "") set DIFF = \`nawk '/acceptable differences/{print \$NF}' \${tempfile}scaleit\`
rm -f \${tempfile}scaleit >& /dev/null
if("\$DIFF" == "") set DIFF = 1000




Shelx_format:
if("\$shelxfile" == "") goto Fourier

# dump FH out in shelx format too.
mtz2various HKLIN \$outfile HKLOUT \$shelxfile << EOF-shelx >> \$logfile
OUTPUT SHELX
FSQUARED
LABIN FP=FH SIGFP=SIGFH
END
EOF-shelx

if((! \$status)&&(-e "\$shelxfile")) echo "\$shelxfile is the SHELX version of \$outfile."


Fourier:
###########################################
if(("\$PHASE" == "")||("\$fourfile" == "")) goto Patterson
# calculate a phased Fourier of FH
# first, we need to retrieve the phase columns
set FOM_cad = ""
if("\$FOM" != "") set FOM_cad = "E2=\$FOM"
# set up FOM weight
set W = ""
if("\$FOM" != "") set W = "W=\$FOM"

# special case of Diso or Dano only
if((\$Ds == 0)||(\$Fs <= 1)) then
    # no double-difference data
    set E1 = "E1=F"
    set fftF1="F1=FH"
    if(\$Fs <= 1) then
	# anomalous data only
	set E1="E1=D"
	set fftF1="DANO=FH"
    endif
    
    # but still want phased FH map
    cad hklin1 \$outfile hklin2 \$mtzfile \\
	hklout \${tempfile}phased.mtz << EOF-cad >> \$logfile
    LABIN FILE 1 E1=FH E2=SIGFH
    LABIN FILE 2 E1=\$PHASE \$FOM_cad
    CTYPO FILE 1 \$E1 E2=Q
EOF-cad

    # ordinary, boring difference Fourier
    fft HKLIN \${tempfile}phased.mtz MAPOUT \${tempfile}FH.map << EOF-fft >> \$logfile
    TITLE \${hiRES}A map of FH @ \$PHASE \$FOM
    RESOLUTION \$scaleRES
    LABIN \$fftF1 SIG1=SIGFH PHI=\$PHASE \$W
    EXCLUDE SIG1 0
EOF-fft
    
    goto norm_Four
endif

# calculate a combined, phased difference Fourier
cad hklin1 \${tempfile}scaled.mtz hklin2 \$mtzfile \\
    hklout \${tempfile}phased.mtz << EOF-cad >> \$logfile
LABIN FILE 1 E1=Diso E2=SIGDiso E3=Dano E4=SIGDano
LABIN FILE 2 E1=\$PHASE \$FOM_cad
CTYPO FILE 1 E1=F E2=Q E3=D E4=Q
EOF-cad

# now we make the isomorphous difference Fourier
fft HKLIN \${tempfile}phased.mtz MAPOUT \${tempfile}Diso.map << EOF-fft >> \$logfile
TITLE \${hiRES}A map of Diso @ \$PHASE \$FOM
RESOLUTION \$scaleRES
LABIN F1=Diso SIG1=SIGDiso PHI=\$PHASE \$W
EXCLUDE SIG1 0
EOF-fft

# then make the anomalous difference Fourier (phases rotated 90 degrees)
fft HKLIN \${tempfile}phased.mtz MAPOUT \${tempfile}Dano.map << EOF-fft >> \$logfile
TITLE \${hiRES}A map of Dano @ \$PHASE \$FOM
RESOLUTION \$scaleRES
LABIN DANO=Dano SIG1=SIGDiso PHI=\$PHASE \$W
EXCLUDE SIG1 0
EOF-fft

# try to make sure we get the right sign
# assume map has at least one large peak (which should be positive)
echo "go" | mapdump mapin \${tempfile}Diso.map |\\
nawk '/Minimum density/{print -\$NF, -1} /Maximum density/{print \$NF, 1}' |\\
sort -nr |\\
nawk 'NR==1{print \$NF}' >! \${tempfile}sign
set diso_sign = \`cat \${tempfile}sign\`
rm -f \${tempfile}sign >& /dev/null

# invert the Diso map if it seems to be upside-down
#if(("\$diso_sign" == "-1")) then
if(("\$diso_sign" == "-1")&&(! \$?USER_ORDER)) then
    echo "inverting sequence of Diso for difference Fourier"
    echo "SCALE FACTOR -1 0" |\\
    mapmask mapin1 \${tempfile}Diso.map mapout \${tempfile}temp.map >> \$logfile
    mv \${tempfile}temp.map \${tempfile}Diso.map >& /dev/null
endif

# add these two maps together (equivalent to vector sum to FH)
echo "MAPS ADD" |\\
mapmask mapin1 \${tempfile}Diso.map mapin2 \${tempfile}Dano.map \\
mapout \${tempfile}FH.map >> \$logfile

norm_Four:
# normalize it for output
echo "SCALE SIGMA" |\\
mapmask mapin \${tempfile}FH.map mapout \$fourfile >> \$logfile

if((! \$status)&&(-e "\$fourfile")) echo "\$fourfile is the map of \$PHASE applied to FH"

rm -f \${tempfile}phased.mtz >& /dev/null
rm -f \${tempfile}Diso.map >& /dev/null
rm -f \${tempfile}Dano.map >& /dev/null
rm -f \${tempfile}FH.map >& /dev/null


Patterson:
###########################################
if("\$pattfile" == "") goto Clean_up

# calculate the unweighted Patterson map (if desired)
fft HKLIN \$outfile MAPOUT \${tempfile}FH.map << EOF-fft >> \$logfile
TITLE \${hiRES}A Patterson of FH
RESOLUTION \$scaleRES
PATTERSON
LABIN F1=FH SIG1=SIGFH F2=FH SIG2=SIGFH
SCALE F2 0.000001 0
EXCLUDE DIFF \$DIFF
EXCLUDE SIG1 0
EOF-fft
peakmax MAPIN \${tempfile}FH.map XYZOUT \${tempfile}.pdb << EOF-pick >> \$logfile
THRESHOLD RMS 3
NUMPEAKS 50
EOF-pick
# extend it to whole unit cell?
mapmask mapin \${tempfile}FH.map mapout "\$pattfile" << EOF >> \$logfile
scale sigma
#xyzlim 0 1 0 1 0 1
EOF
if((! \$status)&&(-e "\$pattfile")) echo "\$pattfile is the Patterson of FH."
rm -f \${tempfile}FH.map >& /dev/null


if("\$wpattfile" == "") goto Clean_up
# calculate the weighted Patterson map (if desired)
fft HKLIN \$outfile MAPOUT \${tempfile}FH.map << EOF-fft >> \$logfile
TITLE \${hiRES}A Patterson of FH/SIGFH
RESOLUTION \$scaleRES
PATTERSON
LABIN F1=FH SIG1=SIGFH W=W F2=FH SIG2=SIGFH
SCALE F2 0.000001 0
#EXCLUDE DIFF \$DIFF
EXCLUDE SIG1 0
EOF-fft
peakmax MAPIN \${tempfile}FH.map XYZOUT \${tempfile}.pdb << EOF-pick >> \$logfile
THRESHOLD RMS 3
NUMPEAKS 50
EOF-pick
# extend it to whole unit cell?
mapmask mapin \${tempfile}FH.map mapout "\$wpattfile" << EOF >> \$logfile
scale sigma
#xyzlim 0 1 0 1 0 1
EOF
if((! \$status)&&(-e "\$wpattfile")) echo "\$wpattfile is the Patterson of FH / sig(FH)^2"
rm -f \${tempfile}FH.map >& /dev/null


# clean up
Clean_up:

rm -f \${tempfile}scaled.mtz >& /dev/null
rm -f \${tempfile}Fs     >& /dev/null
rm -f \${tempfile}Ds     >& /dev/null
rm -f \${tempfile}Fpairs >& /dev/null
rm -f \${tempfile}.pdb   >& /dev/null


exit






Setup:
#################################################

  ####   ######   #####  #    #  #####
 #       #          #    #    #  #    #
  ####   #####      #    #    #  #    #
      #  #          #    #    #  #####
 #    #  #          #    #    #  #
  ####   ######     #     ####   #

#################################################
#
#   gather information on:
#    mtz file
#    data sets
#    resolution limits
#    sigma cuttoff (for map generation)
#    difference cutoff
#
##################################################
if(! \$?DIFF) set DIFF = ""

# scan the command line for files
foreach arg ( \$* )
    # warn about probable mispellings
    if("\$arg" =~ *.mtz) then
	if(-e "\$arg") then
	    set mtzfile = "\$arg"
	else
	    if(! \$?useroutfile) then
		set useroutfile = "\$arg"
		set outfile = "\$arg"
	    endif
	endif
    endif
end


# now all filenames have been initialized

if(! -e "\$mtzfile") goto Help

##################################################
# get crystal and dataset information from the mtz file
echo "go" | mtzdump HKLIN \$mtzfile >! \${tempfile}mtzdump
set CELL  = \`nawk '/Cell Dimensions/{getline;getline;print}' \${tempfile}mtzdump\`
set SGnum = \`nawk '/Space group/{print \$NF+0}' \${tempfile}mtzdump\`
set SG    = \`nawk -v num=\$SGnum '\$1==num && NF>5{print \$4}' \${CLIBD}/symop.lib \`
set hiRES = \`nawk '/Resolution Range/{getline;getline;print \$6}' \${tempfile}mtzdump\`

# get column label names from the mtz file
nawk 'NF>3' \${tempfile}mtzdump |\\
nawk '\$(NF-1)=="F"{print "F", \$NF}\\
      \$(NF-1)=="D"{print "D", \$NF}\\
      \$(NF-1)=="Q"{print "S", \$NF}\\
      \$(NF-1)=="P"{print "P", \$NF}\\
      \$(NF-1)=="W"{print "W", \$NF}' |\\
nawk '/^F/{++n} /^D/{++n} /^P/{++n} {printf "%s", \$1; \\
       if(\$1=="S") printf "%s", last;\\
printf " %d %s \\n",n, \$2; last=\$1}' |\\
cat >! \${tempfile}datasets
rm -f \${tempfile}mtzdump >& /dev/null

# check extent of available data
set temp = \`nawk '/^F/ || /^D/' \${tempfile}datasets |& wc -l\`
if(\$temp < 1) then
    # this is useless, bail out now
    rm -f \${tempfile}datasets >& /dev/null
    echo "ERROR: no usable data in \${mtzfile}! "
    set mtzfile = ""
    goto Help
endif

# get complete, unique lists of DANO/SIGDANOs
cat \${tempfile}datasets |\\
nawk '\$1=="D"{D[\$2]=\$NF} \$1=="SD"{S[\$2]=\$NF} \\
      END{for(i in D) print i, D[i], S[i];}' |\\
sort -un |\\
nawk 'NF==3{print \$2,\$3}' |\\
cat >! \${tempfile}Ds

# get complete, unique lists of F/SIGFs
cat \${tempfile}datasets |\\
nawk '\$1=="F"{F[\$2]=\$NF} \$1=="SF"{S[\$2]=\$NF} \\
      END{for(i in F) print i, F[i], S[i];}' |\\
sort -un |\\
nawk 'NF==3{print \$2,\$3}' |\\
cat >! \${tempfile}Fs

# get complete, unique lists of Phases/FOMs
cat \${tempfile}datasets |\\
nawk '\$1=="P"{P[\$2]=\$NF} \$1=="W"{W[\$2]=\$NF} \\
      END{for(i in P) print i, P[i], W[i];}' |\\
sort -un |\\
nawk 'NF>=2{print \$2,\$3, " "}' |\\
cat >! \${tempfile}Ps


# one last pass through command line
# allow user overrides of all internal variables
set i = 0
echo -n "" >! \${tempfile}userlabels
while( \$i < \$#argv )
    @ i = ( \$i + 1 )
    @ nexti = ( \$i + 1 )
    @ lasti = ( \$i - 1 )
    if(\$nexti > \$#argv) set nexti = \$#argv
    if(\$lasti < 1) set lasti = 1
    set arg = "\$argv[\$i]"
    
    # see if a dataset label was given
    egrep " \$arg " \${tempfile}datasets >& /dev/null
    if(! \$status) then
	if(\$?NO) then
	    # user doesn't want this label
	    # filter it out of the input files
	    egrep -v "^\$arg " \${tempfile}Fs >! \${tempfile}
	    mv \${tempfile} \${tempfile}Fs
	    egrep -v "^\$arg " \${tempfile}Ds >! \${tempfile}
	    mv \${tempfile} \${tempfile}Ds
	    egrep -v "^\$arg " \${tempfile}Ps >! \${tempfile}
	    mv \${tempfile} \${tempfile}Ps
	else
	    # must want only this label?
	    cat \${tempfile}datasets |\\
	    nawk -v label=\$arg 'NF>2 && \$NF==label{print \$NF}' |\\
	    cat >> \${tempfile}userlabels
	endif
	# "NO" stays set for next word
	continue
    endif
    
    # only look at non-file words
    if(-e "\$arg") then
	unset NO
	continue
    endif
    
    if("\$arg" =~ [0-9]*) then
	# we have a number
	if(("\$arg" =~ *A)||("\$argv[\$nexti]" == "A")) then
	    # user-preferred resolution limits
	    set temp = \`echo "\$arg" | nawk 'BEGIN{FS="-"} \$1+0 > 0.1{print \$1+0} \$2+0 > 0.1{print \$2+0}'\`
	    if(\$#temp != 1) then
		set temp = \`echo \$temp | nawk '\$1>\$2{print \$1, \$2} \$2>\$1{print \$2, \$1}'\`
		if(\$#temp == 2) then
		    set loRES = "\$temp[1]"
		    set hiRES = "\$temp[2]"
		endif
	    else
		if("\$temp" != "") set hiRES = "\$temp"
	    endif
	endif
	    
	if(("\$arg" =~ *[Ss]igma)||("\$argv[\$nexti]" =~ [Ss]igma)) then
	    set DATA_cutoff = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	endif
	
	if(("\$arg" =~ *[Dd]iff)||("\$argv[\$lasti]" =~ [Dd]iff)) then
	    set MAX_diso = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	    set MAX_dano = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	endif
	if(("\$arg" =~ *[Dd]iso)||("\$argv[\$lasti]" =~ [Dd]iso)) then
	    set MAX_diso = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	endif
	if(("\$arg" =~ *[Dd]ano)||("\$argv[\$lasti]" =~ [Dd]ano)) then
	    set MAX_dano = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	endif
    endif
    
    # allow "NO" logic to carry through to next word(s)
    unset NO
    if(("\$arg" == "no")||("\$arg" == "not")) set NO
    if(("\$arg" == "don't")||("\$arg" == "ignore")) set NO
    if("\$arg" == "except") set NO
end

rm -f \${tempfile}datasets >& /dev/null

# turn the "user" labels into real label files
cat \${tempfile}Ds \${tempfile}userlabels |\\
nawk 'NF==2{set[\$1]=\$0}\\
      NF==1{print set[\$1]}' |\\
nawk 'NF==2' >! \${tempfile}
set Ds = \`cat \${tempfile} | wc -l\`
if(\$Ds > 0) then
    # user mentioned which Ds to use
    mv \${tempfile} \${tempfile}Ds
endif
set Ds = \`cat \${tempfile}Ds | wc -l\`

# same for Fs
cat \${tempfile}Fs \${tempfile}userlabels |\\
nawk 'NF==2{set[\$1]=\$0}\\
      NF==1{print set[\$1]}' |\\
nawk 'NF==2' >! \${tempfile}
set Fs = \`cat \${tempfile} | wc -l\`
if(\$Fs > 1) then
    # user mentioned which Fs to use
    mv \${tempfile} \${tempfile}Fs
    
    # get user-specified F dataset order 
    set order = \`nawk 'NF==2{print \$1}' \${tempfile}Fs\`
    set USER_ORDER
endif
set Fs = \`cat \${tempfile}Fs | wc -l\`

# same for Phases
cat \${tempfile}Ps \${tempfile}userlabels |\\
nawk 'NF==1{print set[\$1]}\\
           {set[\$1]=\$0}' |\\
nawk 'NF!=0' >! \${tempfile}
set Ps = \`cat \${tempfile} | wc -l\`
if(\$Ps > 0) then
    # user mentioned which Phase to use
    tail -1 \${tempfile} >! \${tempfile}Ps
endif
# only use one phase
tail -1 \${tempfile}Ps >! \${tempfile}
set PHASE = \`nawk '{print \$1}' \${tempfile}\`
set FOM   = \`nawk '{print \$2}' \${tempfile}\`


# clean up
rm -f \${tempfile} >& /dev/null
rm -f \${tempfile}Ps >& /dev/null
rm -f \${tempfile}userlabels >& /dev/null

goto Return_from_Setup


####################################################

The Future:
re-order Diso if scales come out screwy?
make the interface a little slicker, less wordy?
use correlation instead of scale as a weight?
implement use of max_DANO, DATA_cutoff, etc. ?

EOF-script
chmod a+x $SCRIPT

















###############################################################################
#
#	revise.com
#
###############################################################################
set SCRIPT = $SCRIPT_dir/revise.com
cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
#
#	revise.com 
#
#	calculate an estimate of FM for the heavy atoms alone
#	using CCP4's "revise" program
#
#	FM should give cleaner Pattersons, difference Fouriers,
#	and direct methods.
#
#	Do NOT combine differences from data sets expected to have
#	different heavy atom locations.  That would be silly.
#
#
# set this to wherever your awk program is
alias nawk $nawk
nawk 'BEGIN{print}' >& /dev/null
if(\$status) alias nawk awk

# defaults (can be overriden by command-line input)
set mtzfile     = "${finalMTZ}" 	# MAD data set
set fp          = ""			# set these if you know what they are
set fpp         = ""			# set these if you know what they are
set Fs		= "$wavenames"	# overriden by contents of mtz
set wavelengths = "$wavelengths"	# only important if fp/fpp are unassiged
set Ee		= "$Ee"			# only important if fp/fpp are unassiged

set outfile     = "./FM.mtz"		# contains FH, SIGFH
set shelxfile   = "./fm.hkl"		# same thing, shelx format
set pattfile    = "./FM_Patt.map"	# Combined Patterson map

set tempfile = ./revise_temp\$\$


# initialize internal variables
set loRES = 1000
set hiRES = ""

set DATA_cutoff = 0	# sigma cutoff      
set MAX_dano 	= 0.5	# upper Dano cutoff 
set MAX_diso	= 0.1	# upper Diso cutoff 

if(\$#argv == 0) goto Help

# this procedure (re)sets most of the above variables
# from either the provided files, or the command line
goto Setup

Help:
cat << EOF

usage: \$0 \$mtzfile \$Fs

where:
\$mtzfile	- contains at least two sets of F and DANO 
		  you want to combine


EOF

exit 2
#
#   This procedure (at the bottom of the script) does the following
#   1) scan the command line for the mtz file
#   2) set the CELL, SG, and other variables
#   3) generate dataset name list:
#      \${tempfile}LABIN and \${tempfile}waves
Return_from_Setup:


################################################################
#   initial report on intended program flow
################################################################
echo "running revise on \$mtzfile"
echo "with:"

Crossec:
################################################################
if("\$fp" != "") goto Revise

# make a lookup table for recognizing sorted wavelengths
echo "\$wavelengths" | nawk '{for(i=1;i<=NF;++i) print i, \$i, "order"}' |\\
sort -n +1 |\\
cat >! \${tempfile}waveorder

# calculate f' and f" of \$Ee at the provided wavelengths
crossec << EOF >! \${tempfile}crossec.log
ATOM \$Ee
NWAVE \$#wavelengths \$wavelengths
END
EOF
# log will be printed out and deleted later

# condense crossec output to vital info only
cat \${tempfile}crossec.log |\\
nawk 'NF==4 && \$NF !~ /[^0-9\\.]/' |\\
sort -n +1 |\\
cat >! \${tempfile}sortedwaves

# now put the wavelengths back in the order we originally had them
cat \${tempfile}waveorder \${tempfile}sortedwaves |\\
nawk '\$NF=="order"{++i; order[i]=\$1} \\
      \$NF!="order"{++n; print order[n], \$2, \$3, \$4}' |\\
sort -n >! \${tempfile}fp_fdp

# load f' and f" into variables (in dataset order)
set fp  = \`nawk '{print \$3}' \${tempfile}fp_fdp\`
set fpp = \`nawk '{print \$4}' \${tempfile}fp_fdp\`

# clean up
rm -f \${tempfile}fp_fdp >& /dev/null
rm -f \${tempfile}waveorder >& /dev/null
rm -f \${tempfile}sortedwaves >& /dev/null




Revise:
################################################################
# make a table of f' f" values (vs dataset number)
echo "\$fp" | nawk '{for(i=1;i<=NF;++i) print i, \$i, "fp"}' |\\
cat >! \${tempfile}fp
echo "\$fpp" | nawk '{for(i=1;i<=NF;++i) print i, \$i, "fpp"}' |\\
cat >! \${tempfile}fpp

# same thing for wavelengths (vs dataset number)
echo "\$wavelengths" | nawk '{for(i=1;i<=NF;++i) printf "%d %.4f wave\\n", i, \$i}' |\\
cat >! \${tempfile}waves

# make a table of datasets (vs dataset number)
cat \${tempfile}LABIN |\\
nawk '/^FPH/{split(\$1,w,"="); i=substr(\$1,4)+0; print i, w[2], "set"}' |\\
cat >! \${tempfile}datasets

# combine all this information into revise "WAVE" keywords
cat \${tempfile}datasets \${tempfile}waves \${tempfile}fp \${tempfile}fpp |\\
nawk '\$NF=="fp"{fp[\$1]=\$2} \$NF=="fpp"{fpp[\$1]=\$2} \\
      \$NF=="set"{set[\$1]=\$2} \$NF=="wave"{wave[\$1]=\$2} \\
      END{for(i in set) \\
      print "WAVE", i, "LAM", wave[i], "FPR", fp[i], "FDP", fpp[i]}' |\\
sort -n +1 >! \${tempfile}WAVE

# clean up 
rm -f \${tempfile}datasets >& /dev/null
rm -f \${tempfile}waves    >& /dev/null
rm -f \${tempfile}fp       >& /dev/null
rm -f \${tempfile}fpp      >& /dev/null

# display f' f" stuff for user:
echo 'dataset wavelength  f'"'"'     f"   '
cat \${tempfile}LABIN \${tempfile}WAVE |\\
nawk '/^FPH/{split(\$1,w,"="); i=substr(\$1,4)+0; F[i]=w[2]} \\
/^WAVE/{printf "%7s  %7.5fA  %s %s\\n",F[\$2],\$4,\$6,\$8}'
echo ""
echo ""
test -t 1
if(! \$status) then
    echo "is this okay? [Yes]"
    echo -n "-> "
    set temp = "\$<"
    if("\$temp" =~ [Nn]*) then
	echo "please either change the order of your data sets (on the command line)"
	echo "or edit \$0 to enter in the proper f' and f"\\"" values."
	rm -f \${tempfile}LABIN \${tempfile}WAVE >& /dev/null
	exit 3
    endif
endif

# print this out for user's benifit
if(-e \${tempfile}crossec.log) then
    cat \${tempfile}crossec.log
    rm -f \${tempfile}crossec.log >& /dev/null
endif

################################################################
# now actually run revise! 
################################################################
revise hklin \$mtzfile hklout \${tempfile}revised.mtz << eof
TITLE  revised \$mtzfile

# read in Fs:
@\${tempfile}LABIN

LABO FM=FM SIGFM=SIGFM


# read in WAVE cards
@\${tempfile}WAVE

EXCL RISO \$MAX_diso RANO \$MAX_dano SIGM \$DATA_cutoff

END
eof

# strip off everything but FM
rm -f \$outfile >& /dev/null
cad hklin1 \${tempfile}revised.mtz hklout \$outfile << EOF
LABIN FILE 1 E1=FM E2=SIGFM
#LABOUT ALLIN
EOF
if((! \$status)&&(-e \$outfile)) then
    echo "\$outfile is ready."
    echo "calculated from \$mtzfile using:"
    echo 'dataset wavelength  f'"'"'     f"   '
    cat \${tempfile}LABIN \${tempfile}WAVE |\\
    nawk '/^FPH/{split(\$1,w,"="); i=substr(\$1,4)+0; F[i]=w[2]} \\
    /^WAVE/{printf "%7s  %7.4fA  %s %s\\n",F[\$2],\$4,\$6,\$8}'
endif

# clean up a bit
rm -f \${tempfile}WAVE >& /dev/null
rm -f \${tempfile}LABIN >& /dev/null
rm -f \${tempfile}revised.mtz >& /dev/null

Shelx_format:
################################################################
if("\$shelxfile" == "") goto Patterson

# dump FM out in shelx format too.
mtz2various HKLIN \$outfile HKLOUT \$shelxfile << EOF-shelx
OUTPUT SHELX
FSQUARED
LABIN FP=FM SIGFP=SIGFM
END
EOF-shelx

if((! \$status)&&(-e "\$shelxfile")) echo "\$shelxfile is ready."


Patterson:
################################################################
if("\$pattfile" == "") goto Clean_up

# use scaleit to get the recommended DIFF
scaleit hklin \$outfile << EOF >! \${tempfile}scaleit
analyze
labin FP=FM SIGFP=SIGFM FPH1=FM SIGFPH1=SIGFM
SCALE FPH1 0.000001
END
EOF
if(! \$?DIFF) set DIFF
set DIFF = \`nawk '/acceptable differences/{print \$NF}' \${tempfile}scaleit\`
rm -f \${tempfile}scaleit >& /dev/null
if("\$DIFF" == "") set DIFF = 1000

# calculate the Patterson map (if desired)
fft HKLIN \$outfile MAPOUT \${tempfile}FM.map << EOF-fft
PATTERSON
RESO \$loRES \$hiRES
LABIN F1=FM SIG1=SIGFM F2=FM SIG2=SIGFM
SCALE F2 0.000001 0
EXCLUDE DIFF \$DIFF
EXCLUDE SIG1 0
EOF-fft
peakmax MAPIN \${tempfile}FM.map XYZOUT \${tempfile}.pdb << EOF-pick
THRESHOLD RMS 3
NUMPEAKS 50
EOF-pick
# extend it to whole unit cell?
mapmask mapin \${tempfile}FM.map mapout "\$pattfile" << EOF
scale sigma
#xyzlim 0 1 0 1 0 1
EOF
if((! \$status)&&(-e "\$pattfile")) echo "\$pattfile is ready."
rm -f \${tempfile}FM.map >& /dev/null


Clean_up:
################################################################
# clean up


rm -f \${tempfile}.pdb   >& /dev/null


exit






Setup:
#################################################

  ####   ######   #####  #    #  #####
 #       #          #    #    #  #    #
  ####   #####      #    #    #  #    #
      #  #          #    #    #  #####
 #    #  #          #    #    #  #
  ####   ######     #     ####   #

#################################################
#
#   gather information on:
#    mtz file
#    data sets
#    resolution limits
#    sigma cuttoff (for map generation)
#    difference cutoff
#
##################################################
# make multiword variables out of these
set Fs		= ( \$Fs )
set wavelengths = ( \$wavelengths )
set fp          = ( \$fp )
set fpp         = ( \$fpp )

# scan the command line for files
foreach arg ( \$* )
    # warn about probable mispellings
    if("\$arg" =~ *.mtz) then
	if(! -e "\$arg") then
	    echo "WARNING: \$arg does not exist! "
	    continue
	endif
	set mtzfile = "\$arg"
    endif
    
    if("\$arg" =~ [A-Z][a-y]) then
	# might as well consider this an element
	set temp = "\$arg"
	if(\$?CLIBD) then
	    # check the CCP4 atom database
	    set temp = \`nawk -v arg=\$arg 'NF==1 && toupper(\$1)==toupper(arg){print; exit}' \$CLIBD/atomsf.lib\`
	endif
	if("\$temp" != "") set Ee = "\$arg"
	continue
    endif
end


# now all filenames have been initialized

if(! -e "\$mtzfile") goto Help

##################################################
# get crystal and dataset information from the mtz file
echo "go" | mtzdump HKLIN \$mtzfile >! \${tempfile}mtzdump
set CELL  = \`nawk '/Cell Dimensions/{getline;getline;print}' \${tempfile}mtzdump\`
set SG    = \`nawk '/Space group/{print \$5}' \${tempfile}mtzdump\`
set SGnum = \` nawk '/Space group/{print \$NF+0}' \${tempfile}mtzdump \`
set SG = \` nawk -F "[\\047]" '/Space group/{print \$2}' \${tempfile}mtzdump \`
set SG = \` nawk -v num=\$SGnum '\$1==num && NF>5{print \$4}' \${CLIBD}/symop.lib \`
set hiRES = \`nawk '/Resolution Range/{getline;getline;print \$6}' \${tempfile}mtzdump\`

# get column label names from the mtz file
nawk 'NF>3' \${tempfile}mtzdump |\\
nawk '\$(NF-1)=="F"{print "F", \$NF}\\
      \$(NF-1)=="D"{print "D", \$NF}\\
      \$(NF-1)=="Q"{print "S", \$NF}' |\\
nawk '/^F/{++n} {printf "%s", \$1; \\
       if(\$1=="S") printf "%s", last;\\
printf " %d %s\\n",n, \$2; last=\$1}' |\\
nawk '\$1=="F"{F[\$2]=\$NF} \$1=="SF"{SF[\$2]=\$NF} \\
      \$1=="D"{D[\$2]=\$NF} \$1=="SD"{SD[\$2]=\$NF} \\
      END{for(i in F){\\
	print i, F[i], SF[i], D[i],SD[i];}}' |\\
sort -n >! \${tempfile}datasets


# check extent of available data
set temp = \`cat \${tempfile}datasets |& wc -l\`
if(\$temp < 2) then
    # this is useless, revise won't run. bail out now
    rm -f \${tempfile}datasets >& /dev/null
    echo "ERROR: no usable data in \${mtzfile}! "
    set mtzfile = ""
    goto Help
endif


# one last pass through command line
# allow user overrides of all internal variables
set i = 0
echo -n "" >! \${tempfile}userlabels
while( \$i < \$#argv )
    @ i = ( \$i + 1 )
    @ nexti = ( \$i + 1 )
    @ lasti = ( \$i - 1 )
    if(\$nexti > \$#argv) set nexti = \$#argv
    if(\$lasti < 1) set lasti = 1
    set arg = "\$argv[\$i]"
    
    # see if a dataset label was given
    grep " \$arg " \${tempfile}datasets >& /dev/null
    if(! \$status) then
	if(\$?NO) then
	    # user doesn't want this label
	    # filter it out of the input files
	    egrep -v " \$arg " \${tempfile}datasets >! \${tempfile}
	    mv \${tempfile} \${tempfile}datasets
	    continue
	else
	    # must want only this label?
	    cat \${tempfile}mtzdump |\\
	    nawk -v label=\$arg 'NF>2 && \$NF==label{print \$NF}' |\\
	    cat >> \${tempfile}userlabels
	endif
    endif
    
    # only look at non-file words
    if(! -e "\$arg") then
	if("\$arg" =~ [0-9]*) then
	    # we have a number
	    if(("\$arg" =~ *A)||("\$argv[\$nexti]" == "A")) then
		# user-preferred resolution limits
		set temp = \`echo "\$arg" | nawk 'BEGIN{FS="-"} \$1+0 > 0.1{print \$1+0} \$2+0 > 0.1{print \$2+0}'\`
		if(\$#temp != 1) then
		    set temp = \`echo \$temp | nawk '\$1>\$2{print \$1, \$2} \$2>\$1{print \$2, \$1}'\`
		    if(\$#temp == 2) then
			set loRES = "\$temp[1]"
			set hiRES = "\$temp[2]"
		    endif
		else
		    if("\$temp" != "") set hiRES = "\$temp"
		endif
	    endif
	    
	    if(("\$arg" =~ *[Ss]igma)||("\$argv[\$nexti]" =~ [Ss]igma)) then
		set DATA_cutoff = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	    endif
	    
	    if(("\$arg" =~ *[Dd]iff)||("\$argv[\$lasti]" =~ [Dd]iff)) then
		set MAX_diso = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
		set MAX_dano = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	    endif
	    if(("\$arg" =~ *[Dd]iso)||("\$argv[\$lasti]" =~ [Dd]iso)) then
		set MAX_diso = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	    endif
	    if(("\$arg" =~ *[Dd]ano)||("\$argv[\$lasti]" =~ [Dd]ano)) then
		set MAX_dano = \`echo "\$arg" | nawk '\$1+0 > 0{print \$1+0}'\`
	    endif
	endif
	
	# allow "NO" logic to carry through
	unset NO
	if(("\$arg" == "no")||("\$arg" == "not")) set NO
	if(("\$arg" == "don't")||("\$arg" == "ignore")) set NO
	if("\$arg" == "except") set NO
    endif
end

rm -f \${tempfile}mtzdump >& /dev/null

#######################################################
# see if user specified particular labels
set temp = \`cat \${tempfile}userlabels | wc -l\`
if(\$temp != 0) then
    # turn the "user" labels into real label files
    cat \${tempfile}userlabels \${tempfile}datasets |\\
    nawk 'NF==1{++n; label[n]=\$NF}\\
          NF>1 {for(i in label) for(j=2;j<=NF;++j){\\
		if(label[i]==\$j) print i, \$2,\$3,\$4,\$5;break}}' |\\
    sort -n >! \${tempfile}
    mv \${tempfile} \${tempfile}datasets >& /dev/null
endif
if((\$temp == 0)&&(\$#Fs != 0)) then
    # apply the labels from the top of the script?
    echo "\$Fs" | nawk '{for(i=1;i<=NF;++i) print \$i}' >! \${tempfile}userlabels
    cat \${tempfile}userlabels \${tempfile}datasets |\\
    nawk 'NF==1{++n; label[n]=\$NF}\\
          NF>1 {for(i in label) for(j=2;j<=NF;++j){\\
		if(label[i]==\$j) print i, \$2,\$3,\$4,\$5;break}}' |\\
    sort -n >! \${tempfile}
    set temp = \`cat \${tempfile} | wc -l\`
    if(\$temp != 0) mv \${tempfile} \${tempfile}datasets >& /dev/null    
endif


###############################################################
# create "LABIN" cards for revise for each dataset
cat \${tempfile}datasets |\\
nawk '{print "FPH" \$1 "=" \$2, "SIGFPH" \$1 "=" \$3,\\
             "DPH" \$1 "=" \$4, "SIGDPH" \$1 "=" \$5;}' |\\
sort -n |\\
nawk 'BEGIN{printf "%s", "LABIN"} {printf " -\\n%s", \$0} END{print ""}' |\\
cat >! \${tempfile}LABIN


# convert the labin cards into an "Fs" variable
cat \${tempfile}LABIN |\\
nawk '/^FPH/{i=substr(\$1,4)+0; split(\$1,w,"="); print i, w[2]}' |\\
sort -n | nawk '{print \$NF}' |\\
cat >! \${tempfile}Fs
set Fs = \`cat \${tempfile}Fs\`
if(\$#Fs < 2) then
    echo "Sorry, revise needs at least two wavelengths."
    goto Help
endif

# make a table connecting Fs to wavelengths
echo "\$wavelengths" |\\
 nawk '{for(i=1;i<=NF;++i) print i,\$i,"lambda"}' |\\
cat >! \${tempfile}waves


# clean up
rm -f \${tempfile}Fs >& /dev/null
rm -f \${tempfile}datasets >& /dev/null
rm -f \${tempfile}userlabels >& /dev/null

goto Return_from_Setup


exit
####################################################

EOF-script
chmod a+x $SCRIPT



















###############################################################################
#
#	MTZ file re-indexer
#
###############################################################################
set SCRIPT = $SCRIPT_dir/reindex.com
cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
#	reindex.com
#
#	simple interface to reindexing of merged or unmerged mtz data
#
#
# set this to wherever your awk program is
alias nawk $nawk
nawk 'BEGIN{print}' >& /dev/null
if(\$status) alias nawk awk

set mtzfile    = ""
set logfile    = "reindex.log"
set newmtzfile = "reindexed.mtz"
set tempfile   = ./reindex_temp\$\$

set message    = ""
set newSG      = ""

goto Setup
#
#	scan command line for mtz files, space groups, and preferences
#
Help:
cat << EOF-Help
usage: \$0 mtzfile.mtz [SG] [flip] [newmtz.mtz]

where:

mtzfile.mtz	- the mtz data you want to re-index. (merged or unmerged)
SG		- the new space group (P43 for example)
flip		- flip ambiguous axes of P4x P3x and P6x cells
newmtz.mtz	- name you want for the reindexed mtz (default: reindexed.mtz)

Example:    \$0 Fpp.mtz P43212 Fpp.P43212.mtz
Example:    \$0 Fpp.mtz flip

Note: to put the screw axis of P2221 on the shortest cell axis, say P2122
The space group in the new file will be P2221 again, but the cell
will be flipped around appropriately.  Same goes for P2212, or
P21221 and P22121 with P21212

EOF-Help
exit
#
Return_from_Setup:

if(! -e "\$mtzfile") goto Help

# okay to proceed
if("\$message" != "") set message = " \$message"
echo "reindexing \$mtzfile to \$newmtzfile (in \${newSG}\${message})" | tee \$logfile

if(\$?MERGED) then
    # apply any "flip" directives with "reindex"
    echo "\$REINDEX" |\\
    reindex HKLIN \$mtzfile HKLOUT \${tempfile}reindexed.mtz >> \$logfile
    if(\$status) set BAD
    
    # just use mtzutils to change SG record in header
    echo "SYMM \$newSGnum" |\\
    mtzutils HKLIN \${tempfile}reindexed.mtz HKLOUT \${tempfile}newSG.mtz >> \$logfile
    if(\$status) set BAD
    
    rm -f \${tempfile}reindexed.mtz >& /dev/null
    
    # sort it again, just to be safe
    echo "H K L" |\\
    sortmtz HKLIN \${tempfile}newSG.mtz HKLOUT \$newmtzfile >> \$logfile
    if(\$status) set BAD
    
    rm -f \${tempfile}newSG.mtz >& /dev/null
endif
    
if(\$?UNMERGED) then
    reindex HKLIN \$mtzfile HKLOUT \${tempfile}reindexed.mtz << EOF-reindex >> \$logfile
    SYMM \$newSG
    \$REINDEX
EOF-reindex
    if(\$status) set BAD

    # sort again
    sortmtz HKLIN \${tempfile}reindexed.mtz HKLOUT \$newmtzfile << EOF-sort >> \$logfile
${USE_VRSET}VRSET -9E+38
H K L M/ISYM BATCH I SIGI
EOF-sort
    if(\$status) set BAD
    
endif
# remove intermediate file
rm -f \${tempfile}reindexed.mtz >& /dev/null


if((-e "\$newmtzfile")&&(! \$?BAD)) then
    echo "\$newmtzfile is \$mtzfile in \${newSG}\${message}"
    if("\$axes" != "") then
	echo "head" | mtzdump hklin "\$newmtzfile" |\\
	nawk '/Cell Dimensions/{getline;getline;\\
	print "new cell:", \$1, \$2, \$3, \$4, \$5, \$6}'
    endif
else
    # something went wrong
    echo "FAILED!  examine \$logfile to see what went wrong."
endif

exit







Setup:
#############################################################
# check up on essentials
if(! \$?CLIBD) then
    echo "Please set up CCP4, and then run \$0 again"
    goto Help
endif
if(! -e \$CLIBD/symop.lib) then
    echo "ERROR: no \$CLIBD/symop.lib"
    echo "Please set up CCP4, and then run \$0 again"
    goto Help
endif

set REINDEX
set axes
set FLIP = 0

#first, get filenames from the command line
foreach arg ( \$* )
    if("\$arg" =~ *.mtz) then
	if("\$mtzfile" == "") then
	    # havn't initialized this yet
	    if(-e "\$arg") then
		# check to make sure it is readable
		echo "HEAD" | mtzdump HKLIN \$arg >&! \${tempfile}mtzdump
		grep "Space group" \${tempfile}mtzdump >& /dev/null
		if(\$status) then
		    echo "WARNING: \$arg is not an MTZ file! "
		    continue
		endif
		set mtzfile = "\$arg"
	    else
		echo "WARNING: \$arg does not exist! "
		continue
	    endif
	else
	    # already have an input mtz, so assume this is output
	    set newmtzfile = "\$arg"
	    
	    # look for "hidden" space group in name? 
	    echo \$newmtzfile |\\
	     nawk '{for(i=1;i<=length(\$0);++i){c=substr(\$0,i,1);\\
	       if(c ~ /[PpCcIiFfRrHh1-6]/){printf c}else{print ""}}}' |\\
	     nawk '\$1~/^[PpCcIiFfRrHh][1-6]/' >! \${tempfile}SGs
	    foreach sg ( \`tail -10 \${tempfile}SGs\` )
		set temp = \`nawk -v SG=\$sg '\$4 == SG && \$1 < 500 {print \$4}' \$CLIBD/symop.lib | head -1\`
		if("\$temp" != "") then
		    set newSG = "\$temp"
		endif
	    end
	    rm -f \${tempfile}SGs >& /dev/null
	endif
    endif

    # check for new space group
    if("\$arg" =~ [PpCcIiFfRrHh][1-6]*) then
	# check for SGs listed in library (but not the screwy ones)
	set temp = \`echo \$arg | nawk '{print toupper(\$1)}'\`
	if(\$?CLIBD) then
	    set temp = \`nawk -v SG=\$temp '\$4 == SG && \$1 < 500 {print \$4}' \$CLIBD/symop.lib | head -1\`
	endif
	if("\$temp" != "") then
	    set newSG = "\$temp"
	endif
	
	set temp = \`echo \$arg | nawk '{print toupper(\$1)}'\`
	# check for orthorhombic "pseudo-spacegroup" language
	if("\$temp" == P2221) then
	    # P2221 with screw along longest axis
	    set axes  = "a b c"
	    set newSG = "P2221"
	    continue
	endif
	if("\$temp" == P2212) then
	    # P2221 with screw along mid-length axis
	    set axes  = "b c a"
	    set newSG = "P2221"
	    continue
	endif
	if("\$temp" == P2122) then
	    # P2221 with screw along shortest axis
	    set axes  = "c a b"
	    set newSG = "P2221"
	    continue
	endif
	
	if("\$temp" == P21212) then
	    # P21212 with non-screw along longest axis
	    set axes  = "a b c"
	    set newSG = "P21212"
	    continue
	endif
	if("\$temp" == P21221) then
	    # P21212 with non-screw along mid-length axis
	    set axes  = "b c a"
	    set newSG = "P21212"
	    continue
	endif
	if("\$temp" == P22121) then
	    # P21212 with non-screw along shotest axis
	    set axes  = "c a b"
	    set newSG = "P21212"
	    continue
	endif
	    
	if("\$temp" == P112) then
	    # P2 with twofold along longest axis
	    #set axes  = "a b c"
	    set newSG = "P2"
	    continue
	endif
	if("\$temp" == P121) then
	    # P2 with twofold along mid-length axis
	    set axes  = "b c a"
	    set newSG = "P2"
	    continue
	endif
	if("\$temp" == P211) then
	    # P2 with twofold along shortest axis
	    set axes  = "c a b"
	    set newSG = "P2"
	    continue
	endif
	    
	if("\$temp" == P1121) then
	    # P21 with twofold along longest axis
	    #set axes  = "a b c"
	    set newSG = "P21"
	    continue
	endif
	if("\$temp" == P1211) then
	    # P21 with twofold along mid-length axis
	    set axes  = "b c a"
	    set newSG = "P21"
	    continue
	endif
	if("\$temp" == P2111) then
	    # P21 with twofold along shortest axis
	    set axes  = "c a b"
	    set newSG = "P21"
	    continue
	endif
    endif
	
    # check for flipping of ambiguous axes
    if("\$arg" == "flip") then
	# user requested "flip" of axes
	@ FLIP = ( \$FLIP + 1 )
    endif
    
    # what about cubic? 
end

if(! -e "\$mtzfile") then
    goto Help
endif

# get info from the MTZ (should be left over from command-line check)
set CELL  = \`nawk '/Cell Dimensions/{getline;getline;print}' \${tempfile}mtzdump\`
set SGnum = \` nawk '/Space group/{print \$NF+0}' \${tempfile}mtzdump \`
cat \$CLIBD/symop.lib |\\
nawk -v SGnum=\$SGnum '\$1==SGnum{type=substr(tolower(\$6),1,1); key=substr(\$6,4,1);\\
    if((type=="t")&&(key=="G")) type="h"; if((type=="t")&&(key=="C")) type="a";\\
    print \$1, \$5, type substr(\$4,1,1), \$6, \$4;}' |\\
head -1 >! \${tempfile}sgdata
set SGnum = \`nawk '{print \$1}' \${tempfile}sgdata\`
set SG    = \`nawk '{print \$5}' \${tempfile}sgdata\`
set PG    = \`nawk '{print \$2}' \${tempfile}sgdata\`
set BRAV  = \`nawk '{print \$3}' \${tempfile}sgdata\`
set LATT  = \`nawk '{print \$4}' \${tempfile}sgdata\`

if("\$newSG" == "") set newSG = "\$SG"
cat \$CLIBD/symop.lib |\\
nawk -v SG=\$newSG '\$4==SG{type=substr(tolower(\$6),1,1); key=substr(\$6,4,1);\\
    if((type=="t")&&(key=="G")) type="h"; if((type=="t")&&(key=="C")) type="a";\\
    print \$1, \$5, type substr(\$4,1,1), \$6;}' |\\
head -1 >! \${tempfile}sgdata
set newSGnum = \`nawk '{print \$1}' \${tempfile}sgdata\`
set newPG    = \`nawk '{print \$2}' \${tempfile}sgdata\`
set newBRAV  = \`nawk '{print \$3}' \${tempfile}sgdata\`
set newLATT  = \`nawk '{print \$4}' \${tempfile}sgdata\`
rm -f \${tempfile}sgdata >& /dev/null

# decide how/if to flip
if(\$FLIP) then
    if(("\$SG" =~ [IP][46]*)||("\$SG" =~ P3*12)||("\$SG" =~ [PIF]2*3)) then
	set REINDEX = "reindex k, h, -l"
	set message = "with a and b axes flipped"
    endif
    if(("\$SG" == R32)||("\$SG" =~ P3*21)) then
	set REINDEX = "reindex -h, -k, l"
	set message = "with a and b axes inverted"
    endif
    if(("\$SG" =~ [PR]3)||("\$SG" =~ P3[12])) then
	# four possibilities here...
	if(\$FLIP == 1) then
	    set REINDEX = "reindex k, h, -l"
	    set message = "with a and b axes flipped"
	endif
	if(\$FLIP == 2) then
	    set REINDEX = "reindex -h, -k, l"
	    set message = "with a and b axes inverted"
	endif
	if(\$FLIP >  2) then
	    set REINDEX = "reindex -k, -h, -l"
	    set message = "with a and b axes flipped and inverted"
	endif
    endif
    if("\$SG" == P1) then
	# permute the axes (from their cannonical order)
	if(\$FLIP == 1) set axes = "a b c"
	if(\$FLIP == 2) set axes = "b c a"
	if(\$FLIP == 3) set axes = "c a b"
    endif
    if("\$PG" == PG2) then
	# flip the a and c axes?
	set REINDEX = "reindex l, -k, h"
	set message = "with a and c axes flipped"
    endif
    # for other space groups, "flip" is simply ignored
endif

if(("\$newSG" == "")&&(! \$FLIP)) then
    # WTF?
    echo "nothing to do! "
    rm -f \${tempfile}mtzdump >& /dev/null
    goto Help
endif

# now that whole command-line has been read, make some holistic decisions


# check to see if it's merged or unmerged
grep "Number of Batches" \${tempfile}mtzdump >& /dev/null
if(\$status) then
    set MERGED
else
    set UNMERGED
endif
rm -f \${tempfile}mtzdump


# decide on new axis ordering (for asymmetric orthorhombics)
if("\$newSG" == "P222")    set axes = "a b c"
if("\$newSG" == "P212121") set axes = "a b c"
if("\$axes" != "") then
    # get current axis ordering
    # find out what the cannonical one would be
    # then decide how to go from current ordering to the desired one
    echo "\$CELL" | nawk '{\\
	# print out current axis order \\
	print \$1, "h"; print \$2, "k"; print \$3, "l"}' |\\
    sort -n |\\
    nawk '\\
	# add cannonical axis names\\
	NR==1{print \$0, "a"} NR==2{print \$0, "b"} NR==3{print \$0, "c"}' |\\
    nawk -v axes="\$axes" 'BEGIN{split(axes, abc)} {\\
	# write desired axis ordering in front of cannonical one \\
	print abc[NR], \$0}' |\\
    sort |\\
    nawk '# print out new hkl order \\
          {printf "%s ", \$3} END{print ""}' |\\
    nawk '\$1 \$2 \$3 \$1 \$2 \$3 !~ /hkl/{\$3 = "-" \$3} {print \$1, \$2, \$3}' |\\
    cat >! \${tempfile}order    
    set REINDEX = "reindex "\`nawk '{print \$1 ",", \$2 ",", \$3}'  \${tempfile}order\`

    # this should give us a mapping between any two orthorhombics
    
    set temp = \`nawk '{print \$NF}' \${tempfile}order\`
    rm -f \${tempfile}order
    if("\$newSG" == "P2221") then
	set message = "with screw along \$temp axis"
    endif
    if("\$newSG" == "P21212") then
	set message = "with non-screw along \$temp axis"
    endif
    if("\$newSG" == "P2") then
	set message = "with twofold along \$temp axis"
    endif
    if("\$newSG" == "P21") then
	set message = "with twofold screw along \$temp axis"
    endif
    if("\$newSG" == "P1") then
	set message = "with axes in \$axes order."
    endif
endif


# known ways to convert between bravais lattices
if(("\$newBRAV" == "oC")&&("\$BRAV" =~ h[PR])) then
    # this lattice happens to fit, even if it's merged
    set REINDEX = "reindex h+k, k-h, l"
    set message = "with old hkl -> h+k,k-h,l"
    set OKAY
endif
set change_type = \`echo \$SG \$newSG | nawk 'substr(\$1,1,1) != substr(\$2,1,1)'\`
# cubic to just about anything should be okay
if(("\$change_type" == "")&&("\$BRAV" =~ c*)&&("\$newBRAV" !~ h*)) set OKAY
# tetratonal to anything below it should be okay
if(("\$change_type" == "")&&("\$BRAV" =~ t*)&&("\$newBRAV" !~ h*)&&("\$newBRAV" !~ c*)) set OKAY
# orthorhombic to anything below it is okay

# anything can be converted to P1
if("\$newBRAV" == aP) set OKAY

# now check for unhandlable situations


if((\$?MERGED)&&("\$newPG" != "\$PG")&&("\$PG" != "")&&(! \$?OKAY)) then
    # can't do this
    echo "WARNING: You Shouldn't reindex \$SG to \$newSG for a merged mtz! "
    echo ""
    if(\$SGnum < \$newSGnum) then
	cat << EOF
Since \$newSG has higher symmetry than \$SG, we would have
to merge some "unique" spots in \$mtzfile together.  Although
this is technically possible, it is definitely not advisable.
If your space group really is \$newSG, you will probably get
much better scaling and mergeing performance with the new
symmetry imposed during data processing, and probably in
data reduction as well.

EOF
    else
	cat << EOF
Since \$SG has higher symmetry than \$newSG, we would have
to "unmerge" the unique HKLs in \$mtzfile to form multiple, 
yet completely identical Fs.  So, all this will do is
make your mtz bigger, so we will just change the spacegroup
name.

If you think you "overmerged" your data in \$SG, then you
should go back to your mergeing step, and change \$SG to
\$newSG there.

EOF
    endif
#    exit
endif

if(\$?MERGED) goto Return_from_Setup
# now we are dealing with unmerged data

# what do we do about THIS! 
set temp = \`echo "\$newSG \$SG" | nawk 'substr(\$1,1,1) != substr(\$2,1,1)'\`
if("\$temp" != "") then
    # crystal system has changed!
    if(\$newSGnum < \$SGnum) then
	# we are going to loose some data
	
    else
	
    endif
    echo "WARNING: changing the lattice symmetry "
endif

goto Return_from_Setup
EOF-script
chmod a+x $SCRIPT



















###############################################################################
#
#	MTZ file summarizer
#
###############################################################################
set SCRIPT = $SCRIPT_dir/mtz_sum.com
cat << EOF-script >! $SCRIPT
#! /bin/csh -fe
#
#	Utility script for quick, tabular summary of a merged MTZ file
#
#
###############################################################################
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk
#
set reso = ""
set mtzfile = ""
foreach arg ( \$* )
    if("\$arg" =~ [0-8].[0-9]*) set reso = "reso 100 \$arg"
    if(("\$arg" =~ *.mtz)&&(-e "\$arg")) set mtzfile = "\$arg"
end
if("\$mtzfile" == "") then
    echo "usage \$0 mtzfile.mtz [resolution]"
endif
#
echo "STATS NBIN 1 \$reso" | mtzdump hklin \$mtzfile |\\
 nawk '\$11 == "F"{printf \$NF ":\\t%6.2f%% complete,",\$6; F=\$7; lab=\$NF}\\
  \$12 == "SIG"lab {if((\$7+0)){printf "     <F>/<sig> = %6.2f\\n", F/\$7}else{print ""}}'

EOF-script
chmod a+x $SCRIPT

###############################################################################
#
#	scaleit logfile summarizer
#
###############################################################################
set SCRIPT = $SCRIPT_dir/scaleit_sum.com
cat << EOF-script >! $SCRIPT
#! /bin/csh -fe
#
#	Utility script for quick, tabular summary of Diso and Dano
#
#
###############################################################################
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk

echo " \\044TABLE : Diso and Dano:"
echo " \\044GRAPHS:Diso and Dano vs energy:A:1, 2, 3:"
echo " :Diso vs energy:A:1, 2:"
echo " :Dano vs energy:A:1, 3:"
echo "\\044\\044"
echo "energy  Diso  Dano   \\044\\044"
echo "\\044\\044"

cat  \$1 |\\
    nawk 'BEGIN{\
EOF-script

# hard-code wavelengths with their names
echo "$wavenames $wavelengths" |\
nawk '{for(i=1;i<=NF/2;++i){E=12398.4245/$(i+NF/2); \
       print "    E[\042" $i "\042] = " E " ; \134"}}' >> $SCRIPT

cat << EOF-script >> $SCRIPT
    }\\
    \$3 == "FPH="{++i;name[i] = \$4;}\\
    /TOTALS/{++j; diso[j]=substr(\$0,70,8)+0; dano[j]=substr(\$0,100,8)+0;\\
    printf "%.1f %5.1f %5.1f\\n", E[name[j]], diso[j], dano[j]}'

echo "\\044\\044"



echo " \\044TABLE : Risos:"
echo " \\044GRAPHS:Risos vs energy:A:1, 2, 3, 4:"
echo " :RFAC vs energy:A:1, 2:"
echo " :RF_I vs energy:A:1, 3:"
echo " :Wted_R vs energy:A:1, 4:"
echo "\\044\\044"
echo "energy  RFAC RF_I Wted_R   \\044\\044"
echo "\\044\\044"

cat  \$1 |\\
    nawk 'BEGIN{\\
EOF-script
echo "$wavenames $wavelengths" |\
nawk '{for(i=1;i<=NF/2;++i){E=12398.4245/$(i+NF/2); \
       print "    E[\042" $i "\042] = " E " ; \134"}}' >> $SCRIPT
cat << EOF-script >> $SCRIPT
    }\\
    \$3 == "FPH="{++i;name[i] = \$4;}\\
    /TOTALS/{++j; RFAC[j]=substr(\$0,63,7)*100;\\
                  RF_I[j]=substr(\$0,56,7)*100;\\
                  Wted_R[j]=substr(\$0,49,7)*100;\\
    printf "%.1f %5.1f %5.1f %5.1f\\n", E[name[j]], RFAC[j], RF_I[j], Wted_R[j]}'

echo "\\044\\044"



EOF-script
chmod a+x $SCRIPT

###############################################################################
#
#	Scala logfile summarizer
#
###############################################################################
set SCRIPT = $SCRIPT_dir/scala_summary.com
cat << EOF-script >! $SCRIPT
#! /bin/csh -fe
#
#  Automatically generated script for sumarizing scala logs
#  in nice, tabular format
#
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk

echo "wavelength Rmerge  Ranom I/sigma Complete Mult Wilson B"
foreach log ( \$* )
    basename \$log .log | nawk 'BEGIN{RS="_"}{print}' |\
     nawk 'BEGIN{RS="."}{print}' | nawk 'BEGIN{RS="-"}{print}' |\
     nawk 'NF != 0' | tail -1 |\
     nawk '{printf "%10s ", \$NF}'
    nawk '/Dmin/{idx=index(\$0,"Mn")}\
	/Overall:/{printf "%6.3f %6.3f %7.3f\n", \$2, \$5, substr(\$0,idx)+0}' \
    \$log | tail -1 | nawk '{printf "%s", \$0}'
    nawk '/Completeness v Resolution/,/Overall/{print}' \$log |\
    tail -1 | nawk '{printf " %7.1f%% %4.1f", \$5, \$7}'
    nawk '/squares straight/{printf " %8.3f\n", \$8}' \
    \$log | tail -1 | nawk '{printf "%s", \$0}'
    echo ""
end
EOF-script
chmod a+x $SCRIPT





###############################################################################
#
#	AutoScala   - optimizes SDCORR card in a SCALA mergeing script
#
###############################################################################
set SCRIPT = $SCRIPT_dir/autoscala
cat << EOF-script >! $SCRIPT
#! $nawk -f
BEGIN {
#
#
#	Automatically adjusts SDCORR card in SCALA for "optimum" statistics
#	as given by agrovata.
#
#	The provided script MUST generate mergeing statistics (agrovata or scala).
#	The following scripts must be creatable.
#
#	test_script = "./" FILENAME "_test"
#	best_script = "./" FILENAME "_best"
#
#	The latter will continuously be updated to the best SDCORR line
#	found so far.
#
#
#
#
#
#
#
	# these are convergence criteria
	# specify the number of decimal places you want to refine
	# each variable to.
	sdfac_decimals = 2
	sdprime_decimals = 0
	sdadd_decimals = 2

	# defaults for Golden Section search
	# go ahead and modify these if you're 
	# SURE the best value is bracketed
	maximum_sdprime = 15
	maximum_sdadd   = 0.1

	minimum_sdprime = 0
	minimum_sdadd   = 0
	
	# alternately, you may specify "tune=factor"

	best_so_far = 1000000
	line = 0
	if(!debug) debug = 0
}
# finish up initialization (for linux awk)
NR==1{
    if((!test_script)&&(FILENAME)) test_script = "./" FILENAME "_test"
    if((!best_script)&&(FILENAME)) best_script = "./" FILENAME "_best"
    if((!test_log)&&(FILENAME))    test_log    = "./" FILENAME ".log"

    if(!test_script) test_script = "./scala.com_test"
    if(!best_script) best_script = "./scala.com_best"
    if(!test_log)    test_log    = "./scala.com.log"
}

########################################################################
#       Analyze the supplied script & copy it into memory              #
########################################################################

# copy the script into an array
{
    ++line;
    ++number_of_lines;
    script[line] = \$0;

    # now make everything easier to search
    \$0 = tolower(\$0);
}

# look for the "SDCORR" card in scala
/^sdcorr/ {
    # get the current sd correction parameters
    sdfac = \$2;
    if(NF == 4)
    {
	sdprime = \$3;
	sdadd = \$4;
    }
    else
    {
	# no sdprime specified
	sdprime = 0;
	sdadd = \$3;
    }	

    sdcorr_line = line;
}

END {
########################################################################
#	Now begins the "real" program                                  #
########################################################################


    # user may specify "tuning mode"
    if(tune > 1)
    {
	if(sdprime)
	{
	    maximum_sdprime = sdprime * tune;
	    minimum_sdprime = sdprime / tune;
	}

	if(sdadd)
	{
	    maximum_sdadd = sdadd * tune;
	    minimum_sdadd = sdadd / tune;
	}
    }

    # signal to optimize ...
    max_sdfac = max_sdprime = max_sdadd = "restart"
    done = 0;
    
    while(!done)
    {
	# write the script
	WriteScript(test_script);

	if(value[script[sdcorr_line]] != 0)
	{
	    # we have already caclulated this value
	    
	    RMSD_sigma = value[script[sdcorr_line]]
	    if(debug>1) print  "recall: " script[sdcorr_line] "\\t\\tRMSD(sigma): " RMSD_sigma*100 "\\tbest so far: " best_so_far*100;
	}
	else
	{
	    # we havn't run this pair before
	    
	    # run it, and filter output
	    printf  "trying: %s ", script[sdcorr_line] sdcorr_pad;
	    GetResults(test_script);
	
	    # remember all values obtained (to avoid repeats)
	    value[script[sdcorr_line]] = RMSD_sigma;

	    # print out rating of this run
	    printf "RMS(scatter/sigma -1): %8.5f    ", RMSD_sigma;

	    # update absolute best result
	    if(RMSD_sigma < best_so_far) 
	    {
		best_so_far = RMSD_sigma;
		best_sdcorr = script[sdcorr_line];

		# write out best scripts immediately
		WriteScript(best_script);

		# print out the values used, and their effect
		printf "best so far: %8.5f ", best_so_far;
	    }
	    # finish the line
	    print "";
	}

	# pick next values based on last run...
	if(NextSDadd()) 
	{
	    # ...has converged
	    
	    # "inner" loop done, so pick next sdprime value
	    if(NextSDprime()) 
	    {
		# ...has converged
		
		# both have converged, so we are done
		done = "true";
	    }
	    # reoptimize sdadd with new sdprime
	    max_sdadd = "restart";
	}
    }
    
    # Main loop has exited.
    # parameters have converged, so finish up


    # update scala's output files to best values
    #GetResults(best_script);

    
    # update variables for output
    split(best_sdcorr, best);
#    sdfac   = best[2];
    sdfac   = sprintf("%." sdfac_decimals "f", best_sdfac);
    sdprime = best[3];
    sdadd   = best[4];    
    
    # get rid of "test" script
    system("rm -f " test_script)
    # update the "best" script
    WriteScript(best_script);
    
    if(summary != "no")	    # option for cleaner output
    {
	print  "\\n\\nSUMMARY:\\n"
	printf("%s ==> RMSD = %.4f\\n", best_sdcorr, value[best_sdcorr]);
	print  "\\n"
	print  "****************************************"
	print  "**      BEST CARD FOUND               **"
	printf "**      %21s         **\\n", script[sdcorr_line];
	print  "****************************************"
	print  "\\n"

	# tell user what to do next
	print "***********************"
	print "***   ALL DONE!!!   ***"
	print "***********************"

	print "Your new " FILENAME " can be found at: " best_script

    }

}
########################################################################
#	Functions used in this script                                  #
########################################################################


#########################################################################
#	NextSDprime()							#
#									#
#	Updates optimizing parameters based on observed output from	#
#	the agrovata script.						#
#									#
#	uses:		RMSD_sigma					#
#									#
#	modifies:	sdprime						#
#									#
#	contains:	max_sdprime					#
#			min_sdprime					#
#			best_sdprime					#
#									#
#			value[]						#
#									#
#########################################################################
function NextSDprime()
{
    # offset SDprime to keep from interfering with sdfac in value[]
    sdprime += 100;
    best_sdprime += 100;

    # this should be known first
    value[sdprime] = RMSD_sigma;

    # pick some reasonable limits
    if (max_sdprime == "restart") 
    {
	min_sdprime = minimum_sdprime +100;
	value[max_sdprime] = 100000;

	best_sdprime = min_sdprime;
	value[best_sdprime] = 100000;

	max_sdprime = maximum_sdprime +100;
	value[max_sdprime] = 100000;
    }

    # use the Golden Section method to find minimum
    sdprime = GoldStep(min_sdprime, sdprime, best_sdprime, max_sdprime);
    min_sdprime = Gold_min;
    max_sdprime = Gold_max;
    best_sdprime = Gold_best;

        
    # just for monitoring...
    sdprime_step = sdprime - best_sdprime;
	
    # update move counter
    ++move;

    # now move sdprime back to true value
    sdprime -= 100;
    best_sdprime -= 100;

    # check for convergence
    if(max_sdprime - min_sdprime < 0.1^sdprime_decimals)
    {
	#convergence reached
	doneness = "true";
    }
    else
    {
	doneness = "";
    }
    
    # in case you're intetested...
    return doneness;
}

#########################################################################
#	NextSDadd()							#
#									#
#	Updates optimizing parameters based on observed output from	#
#	the agrovata script.						#
#									#
#	uses:		RMSD_sigma					#
#									#
#	modifies:	sdadd						#
#									#
#	contains:	max_sdadd					#
#			min_sdadd					#
#			best_sdadd					#
#									#
#			value[]						#
#									#
#########################################################################
function NextSDadd()
{
    # this should be known first
    value[sdadd] = RMSD_sigma;

    # pick some reasonable limits
    if (max_sdadd == "restart") 
    {
	min_sdadd = minimum_sdadd;
	value[max_sdadd] = 100000;

	best_sdadd = min_sdadd - min_sdadd_step/10;
	value[best_sdadd] = 100000;

	max_sdadd = maximum_sdadd;
	value[max_sdadd] = 100000;
    }

    # use the Golden Section method to find minimum
    sdadd = GoldStep(min_sdadd, sdadd, best_sdadd, max_sdadd);
    min_sdadd = Gold_min;
    max_sdadd = Gold_max;
    best_sdadd = Gold_best;

                
    # just for monitoring...
    sdadd_step = sdadd - best_sdadd;
	
    # update move counter
    ++move;

    # check for convergence
    if(max_sdadd - min_sdadd < 0.1^sdadd_decimals)
    {
	#convergence reached
	doneness = "true";
    }
    else
    {
	doneness = "";
    }

    # in case you're intetested...
    return doneness;
}





#########################################################################
#	WriteScript(filename)						#
#									#
#	Writes the script contained in the script[] array to the	#
#	file given in "filename."					#
#		The following cards, however, are rewriten with		#
#	the existing trial values:	WIDTH				#
#									#
#	uses:		script[]					#
#			bin_scale					#
#			width						#
#									#
#########################################################################
function WriteScript( filename )
{	
    for(line = 1; line <= number_of_lines; ++line)
    {
	if(line == sdcorr_line)
	{
	    script[line] = "SDCORR " sdfac \\
				 " " round(sdprime, sdprime_decimals) \\
				 " " round(sdadd, sdadd_decimals);
	    sdcorr_pad = "";
    	    for(i=0;i < 25 - length(script[line]); ++i) {sdcorr_pad = sdcorr_pad " "}
	}

	print script[line] > filename;
    }
    # close the file
    close(filename);

    #change it to an executable;
    system("chmod u+x " filename);
}


#########################################################################
#       GetResults(filename)                                            #
#                                                                       #
#       Runs the script given as "filename" and analyzes the output     #
#       for the agrovata Sigma(scatter/SD) graph.                       #
#               The RMS deviation from an even distribution of          #
#       observations per intensity bin is computed as RMSD_width.       #
#               The RMS deviation from 1.0 of the scatter/SD is         #
#       also computed and placed in RMSD_sigma.                         #
#                                                                       #
#       modifies:       RMSD_width                                      #
#                       RMSD_width_p                                    #
#                       RMSD_sigma                                      #
#                       RMSD_sigma_p                                    #
#                       RMSD_sigma_w                                    #
#                       best_sdfac                                      #
#                                                                       #
#########################################################################
function GetResults ( filename )
{
    # report what we're doing
#    print "running " filename "..."

    # analyze stdout from "agrovata" script
    command = filename
#    if(test_log) command = filename " | tee " test_log
    while(command | getline > 0)
    {
        # look for the graph entry
        if (\$0 ~ /^ \\\$GRAPHS: Sigma\\(scatter\\/SD\\)/)
	{
	    graph = 1
	    
	    # initialize cumulative variables
	    count_full["sum"]=count_part["sum"]=count["sum"]=0
	    bins = 0  
	}
	if (\$1 == "TOTALS:") graph = 0

        if (graph && (\$0 !~ /[a-z]/) && NF>3)
        {
            # we are reading a line of graph data
	    ++bins
	    
	    # gather stats	    
#	    count_full[bins]  = \$5
#	    count_part[bins]  = \$9
	    count_full[bins]  = substr(\$0, 38, 9)+0
	    count_part[bins]  = substr(\$0, 77, 9)+0
	    count[bins]       = count_full[bins]+count_part[bins]

	    # sums
	    count_full["sum"] += count_full[bins]
	    count_part["sum"] += count_part[bins]
	         count["sum"] += count[bins]

#	    scatt_full[bins]  = \$7-1
#	    scatt_part[bins]  = \$NF-1
	    scatt_full[bins]  = substr(\$0, 55, 8)-1
	    scatt_part[bins]  = \$NF-1
	    scatt[bins]       = (scatt_full[bins]+scatt_part[bins])/2
	}

        # look for refined SDfac
	if(\$0 ~ /Final assessment of SDcorrection multipliers/) { sdfac_list = 1 }
	if(\$0 ~ /Summary/) { sdfac_list = 0 }

	if(sdfac_list)
	{
	    if((\$0 ~ /[0-9]/)&&(\$0 !~ /[a-z]/))
	    {
		if(NF>2)
		{
		    ++sdfacs
		    sdfac_sum+=(count_full["sum"]*\$2+count_part["sum"]*\$5)/(count["sum"])
		}
		else
		{
		    sdfac_list = 0
		}
	    }
	}
    }
    
    # close the pipe!
    close(filename);

    # calculate mean SDFAC
    if(sdfacs) best_sdfac = sdfac_sum/sdfacs
#    printf best_sdfac

    # the scatt and count arrays should now contain the LAST graph in the log
    if(bins)
    {
	# calculate average values
	count_full["avg"] = count_full["sum"]/bins
	count_part["avg"] = count_part["sum"]/bins
	count["avg"]      = count["sum"]     /bins

	# and now rmsds
	count_full["rms"]=count_part["rms"]=count["rms"]=0
	scatt_full["rms"]=scatt_part["rms"]=scatt["rms"]=scatt["wrms"]=0
	for(i=1; i<=bins; ++i)
	{
	    count_full["rms"] += (count_full[i]-count_full["avg"])*(count_full[i]-count_full["avg"])
	    count_part["rms"] += (count_part[i]-count_part["avg"])*(count_part[i]-count_part["avg"])
	    count["rms"]      += (count[i]     -count["avg"]     )*(count[i]     -count["avg"]     )

	    scatt_full["rms"] += scatt_full[i]*scatt_full[i]
	    scatt_part["rms"] += scatt_part[i]*scatt_part[i]
	    scatt["rms"]      += scatt[i]*scatt[i]
	    scatt["wrms"]     += count_full[i]*scatt_full[i]*scatt_full[i]
	    scatt["wrms"]     += count_part[i]*scatt_part[i]*scatt_part[i]
	}
	
	count_full["rms"] = sqrt(count_full["rms"]/bins)
	count_part["rms"] = sqrt(count_part["rms"]/bins)
	count["rms"]      = sqrt(count["rms"]/bins)
	
	scatt_full["rms"] = sqrt(scatt_full["rms"]/bins)
	scatt_part["rms"] = sqrt(scatt_part["rms"]/bins)
	scatt["rms"]      = sqrt(scatt["rms"]/bins)
	scatt["wrms"]     = sqrt(scatt["wrms"]/count["sum"])
    }
    else
    {
	print "ERROR: no stats generated by " filename
	print "please edit " FILENAME " and make sure it runs by itself."
	exit
    }

    # update global variables
    RMSD_width   = count_full["rms"]
    RMSD_width_p = count_part["rms"]
    RMSD_sigma   = scatt_full["rms"]
    RMSD_sigma_p = scatt_part["rms"]
    RMSD_sigma_w = scatt["wrms"]

    RMSD_sigma = RMSD_sigma_w

}

#########################################################################
#	GoldStep(min, last, best, max)					#
#									#
# Computes the Golden (next) step to minimize a variable		#
#									#
#	modifies:							#
#									#
#########################################################################

function GoldStep(min, last, best, max)
{
    # debugging...
    if(debug) printf "\\n" min " " best " " max " ==GoldStep==> "
    
    # eliminate sections that cannot contain the minimum
    if(last >= best)
    {
	# we tried a larger value last time
	if(value[last] <= value[best])
	{
	    # minimum must be somewhere between "best" and "max"
	    min = best;
	    best = last;
	}
	else
	{
	    # minimum must be somewhere between "min" and "last"
	    max = last;
	}
    }
    else
    {
	# we tried a smaller value last time
	if(value[last] <= value[best])
	{
	    # minimum must be somewhere between "min" and "best"
	    max = best;
	    best = last;
	}
	else
	{
	    # minimum must be somewhere between "last" and "max"
	    min = last;	    
	}
    }

    # now decide what width to try next
    if((max - best) > (best - min))
    {
	# next value should be larger
	Gold_next = best + 0.38*(max - best)
    }
    else
    {
	# next value should be smaller
	Gold_next = best - 0.38*(best - min)
    }
    
    # save the parameters as Globals ( this is awful!)
    Gold_min = min;
    Gold_max = max;
    Gold_next = Gold_next;
    Gold_best = best;
    
    # debugging...
    if (debug) printf min " " best " " max "\\t"
    
    return Gold_next;
}


function abs(number)
{
	return sqrt(number^2);
}

function round(number, sigdigs)
{
	return int(number*10^sigdigs)/10^sigdigs;
}
EOF-script
chmod a+x $SCRIPT






























SortScript:
###############################################################################

  ####    ####   #####    #####
 #       #    #  #    #     #
  ####   #    #  #    #     #
      #  #    #  #####      #
 #    #  #    #  #   #      #
  ####    ####   #    #     #

###############################################################################
#
#	Rindex, rebatch, and sort all the data into one, big-ass file
#
###############################################################################
set SCRIPT = ${SCRIPT_dir}/sort_everything.com
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak

cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
#   Sorting script made by Scaler Elves
#
# Defaults
set SG       = $SG
set outfile  = "${rawMTZ}"
set tempfile = "\${CCP4_SCR}/sort_temp\$\$"
#
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk
#
goto Setup
############################
#
# usage: $SCRIPT [sg]
# where: [sg] is the new space group
#    eg: $SCRIPT $SG
#        means reindex all data to $SG (and sort it)
#
############################
ReturnFromSetup:
#####################################################
EOF-script

set cmt = "#"
if($?RENAME_DATASETS) set cmt = ""

# create rebatch entries for mtzs
cat $RUNFILE |\
nawk -v cmt="$cmt" '$2 == "wavelength" {wave=$3}\
   $3 == "+" && /.mtz$/{++n; mtz=$NF; first=$4; last=$6; add=$2; {\
   print "\043 add " add " to " first " through " last " in " mtz; \
   print "rebatch HKLIN " mtz " HKLOUT ${tempfile}run" n ".mtz << EOF-rebatch";\
   print "BATCH " first " TO " last " INCLUDE "\
   print "BATCH " first " TO " last " ADD " add;\
   print cmt "BATCH " first " TO " last " pname data xname xtal dname " wave;\
   print "EOF-rebatch\n";}}' >> $SCRIPT


# create rotaprep entries for denzo files
if($?DENZO_FILES) then
    cat << EOF-scriptbit >> $SCRIPT

# prepare denzo files
echo "preparing denzo files"

rm -f \${tempfile}run*.york >& /dev/null

EOF-scriptbit
    
    # prepare denzo files
    if(-e ${tempfile}x2york.awk) mv ${tempfile}x2york.awk ${SCRIPT_dir}/x2york.awk
    chmod a+x ${SCRIPT_dir}/x2york.awk
    
    # create instructions to cat denzo runs together
    cat ${tempfile}strategy |\
    nawk -v cvt=${SCRIPT_dir}/x2york.awk '$NF=="DENZO" {\
    printf "%s %s >> ${tempfile}run%d.york\n", cvt, $(NF-1), $2;}' |\
    sort -n | sort -u |\
    nawk '{if(lastrun != $NF) printf "\necho \"\" >! %s\n", $NF; print; lastrun=$NF}' |\
    nawk '{print "echo " $2; print}' |\
    cat >> $SCRIPT

    # now give instructions to rotaprep each run
    foreach denzorun ( `nawk '/DENZO/{print $2}' ${tempfile}strategy | sort -u -n` )
	# retrieve batch add value
	set ADD = `nawk -v run=$denzorun 'run==$2{print $3-$6; exit}' ${tempfile}strategy`
	
	cat << EOF-denzopreprun >> $SCRIPT
	
rotaprep HKLIN \${tempfile}run${denzorun}.york HKLOUT \${tempfile}run${denzorun}.mtz << eof-rotaprep
INPUT DENZO
SYMMETRY \$SG
DETECTOR $XYrange
ADDBATCH $ADD
END
eof-rotaprep

# done with this file
rm -f \${tempfile}run${denzorun}.york

EOF-denzopreprun
    end
    
endif

# don't need this file anymore
rm -f ${tempfile}strategy


cat << EOF-script >> $SCRIPT
#####################################################

# sort all runs together into one, big MTZ
sortmtz HKLOUT \$outfile << EOF-sort
${USE_VRSET}VRSET -9E+38
H K L M/ISYM BATCH I SIGI
EOF-script
nawk '$3 == "+"{++n; print "${tempfile}run" n ".mtz"}' $RUNFILE >> $SCRIPT
cat << EOF-script >> $SCRIPT
EOF-sort
if(! \$status) then
    echo "data from all runs and all wavelengths are now in: \$outfile"
else
    echo "sorting failed!  Check for overlapping batch numbers."
    set BAD
    exit 9
endif

# now check to see if we need to reindex
set sg = \`echo "HEAD" | mtzdump hklin \$outfile | nawk '/Space group =/{print \$5}'\`
if(("\$sg" != "\$SG")&&(! \$?REINDEX)) then
    set REINDEX = ""
endif

#####################################################

# clean up
EOF-script
nawk '$3 == "+"{++n; print "rm -f ${tempfile}run" n ".mtz"}' $RUNFILE >> $SCRIPT

cat << EOF-script >> $SCRIPT
if(\$?REINDEX) goto reindex
exit
#####################################################
reindex:


# re-index all data to new space group
reindex HKLIN \$outfile HKLOUT \${tempfile}reindex.mtz << EOF-reindex
SYMM \$SG
\$REINDEX
EOF-reindex
if(\$status) set BAD

# sort again
sortmtz HKLOUT \$outfile << EOF-sort
${USE_VRSET}VRSET -9E+38
H K L M/ISYM BATCH I SIGI
\${tempfile}reindex.mtz
EOF-sort
if(\$status) set BAD

if(! \$?BAD) then
    echo "\$outfile is now in \$SG"
else
    echo "reindexing failed, Dang."
    exit 9
endif

rm -f \${tempfile}reindex.mtz

exit
#####################################################
Setup:
foreach arg ( \$* )
    # new space group? 
    if("\$arg" =~ [PpCcIiFfRrHh][1-6]*) then
	set temp = \`nawk -v SG=\$arg '\$4 == toupper(SG) && \$1 < 500 {print \$4}' \$CLIBD/symop.lib | head -1\`
	if("\$temp" =~ [PpCcIiFfRrHh][1-6]*) then
	    set SG = "\$temp"
	else
	    # check for "pseudo-spacegroup" language
	    if("\$arg" =~ [Pp]2212) then
		# P2221 with screw along current "b"
		set SG = "P2221"
		set REINDEX = "reindex l, h, k"
	    endif
	    if("\$arg" =~ [Pp]2122) then
		# P2221 with screw along current "a"
		set SG = "P2221"
		set REINDEX = "reindex k, l, h"
	    endif
	    if("\$arg" =~ [Pp]21221) then
		# P21212 with non-screw along current "b"
		set SG = "P21212"
		set REINDEX = "reindex l, h, k"
	    endif
	    if("\$arg" =~ [Pp]22121) then
		# P21212 with non-screw along current "a"
		set SG = "P21212"
		set REINDEX = "reindex k, l, h"
	    endif
	    if("\$arg" =~ [Cc]2212) then
		# C2221 with screw along current "b"
		set SG = "C2221"
		set REINDEX = "reindex l, h, k"
	    endif
	    if("\$arg" =~ [Cc]2122) then
		# C2221 with screw along current "a"
		set SG = "C2221"
		set REINDEX = "reindex k, l, h"
	    endif
	endif
    endif
end
goto ReturnFromSetup

EOF-script
chmod a+x $SCRIPT












ImportRefScript:
###############################################################################

  ####   ######   #####          #####   ######  ######
 #    #  #          #            #    #  #       #
 #       #####      #            #    #  #####   #####
 #  ###  #          #            #####   #       #
 #    #  #          #            #   #   #       #
  ####   ######     #            #    #  ######  #

###############################################################################
#
#	import an arbitrary reference set
#
###############################################################################
set SCRIPT = ${SCRIPT_dir}/import_reference.com
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak

cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
#   Reference set import script by Scaler Elves
#
#
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk
#
set infile    = ${rawMTZ}
set reffile   = ${refMTZ}
set refdata   = ""
set ref_batch = $ref_batch
set outfile   = ${sortMTZ}
#
set tempfile  = ${tempfile}get_ref
#
############################################################################
goto Setup
#
# scan command line for user options
#
Return_from_Setup:

##########################################################################
# Remove free-R flagged HKLs from the reference MTZ
#
##########################################################################
if(\$?ref_free) then
    # need to convert to text in order to exclude freeR
    mtz2various hklin \$reffile hklout \${tempfile}.hkl << EOF-various | tee \${tempfile}log
    labin FP=\$ref_set SIGFP=\$ref_sig FREE=\$ref_free
    OUTPUT XPLOR
    EXCLUDE FREER 0
EOF-various
    if(\$status) set BAD

    # prepare to read back in
    set CELL  = \`nawk '/Cell Dimensions/{getline; getline; print}' \${tempfile}log\`
    set SG    = \`nawk '/Space group/{print \$5}' \${tempfile}log\`
    set SGnum = \`nawk '/Space group/{print \$NF+0}' \${tempfile}log\`
    set SG    = \`nawk -F "[\\047]" '/Space group/{print \$2}' \${tempfile}log\`
    set SG    = \`nawk -v num=\$SGnum '\$1==num && NF>5{print \$4}' \${CLIBD}/symop.lib\`
    rm -f \${tempfile}log >& /dev/null
    set temp = "F"
    if("\$input" == mtzi) set temp = J
   
    # simplefy file
    cat \${tempfile}.hkl |\\
    nawk '\$1 ~ /^INDE/{split(\$0,w,"="); print \$2, \$3, \$4, w[2]+0,w[3]+0}' |\\
    cat >! \${tempfile}no_freeR.hkl
    
    # now read it back in...
    f2mtz hklin \${tempfile}no_freeR.hkl hklout \${tempfile}reference.mtz << EOF-f2mtz
    CELL \$CELL
    SYMM \$SG
    SKIP
    LABOUT H K L \$ref_set \$ref_sig
    CTYPO  H H H \$temp Q
EOF-f2mtz
    if(\$status) set BAD
    
    # clean up
    rm -f \${tempfile}.hkl >& /dev/null
    rm -f \${tempfile}no_freeR.hkl >& /dev/null
else
    # symmetry with above
    cp \$reffile \${tempfile}reference.mtz
endif

##########################################################################
# Reformat reference set for reinput 
#
##########################################################################
rotaprep hklin \${tempfile}reference.mtz  hklout \${tempfile}prep.mtz << eof-reprep
input \$input
batch \$ref_batch
labin \$labin
DNAME ref
PNAME ref
eof-reprep
if(\$status) set BAD

# Put reference dataset together with raw data: \$outfile
mtzutils hklin1 \$infile hklin2 \${tempfile}prep.mtz  hklout \$outfile << eof-utils
merge
eof-utils
if(\$status) set BAD

# clean up
rm -f \${tempfile}prep.mtz
rm -f \${tempfile}reference.mtz >& /dev/null


##########################################################################
if((-e \$outfile)&&(! \$?BAD)) then
    echo "\$outfile now contains all raw data, plus "
    echo " \${ref_set}/\${ref_sig} from \${reffile}, as a reference "
    if(\$?ref_free) then
        echo "excluding HKLs where \$ref_free == 0"
    endif
else
    echo "import failed!  See above for why."
    exit 9
endif
##########################################################################
exit

Setup:
# now scan command line
foreach arg ( \$* )
    # specific input file
    if(\$arg =~ *.mtz) then
        echo "HEAD" | mtzdump hklin \$arg |\\
        nawk '/Column Labels/{getline;getline;while(NF){printf "%s ", \$0; getline};print ""}\\
              /Column Types/ {getline;getline;while(NF){printf "%s ", \$0; getline};print ""}' |\\
	cat >! \${tempfile}.mtzdump
	grep "H K L" \${tempfile}.mtzdump >& /dev/null
        if(\$status) then
	    echo "WARNING: \$arg is not an mtz file! "
	    rm -f \${tempfile}.mtzdump >& /dev/null
	    continue
        endif

	# check for multirecord mtz
	grep " BATCH I " \${tempfile}.mtzdump >& /dev/null
	if(! \$status) then
	    # this must be the multirecord, raw-data file
	    set infile = \$arg
	    rm -f \${tempfile}.mtzdump >& /dev/null
	    continue
	endif
	
	# this must be a single-record mtz (merged)
	set reffile = \$arg
	cat \${tempfile}.mtzdump |\\
	nawk 'NR==1{split(\$0,name)} NR==2{for(i=1;i<=NF;++i){print \$i,name[i]}}' |\\
	cat >! \${tempfile}.labels
	rm -f \${tempfile}.mtzdump >& /dev/null
	continue
    endif
end

if(! -e "\$reffile") then
    echo "ERROR: reference mtz file does not exist! "
    rm -f \${tempfile}.labels >& /dev/null
    set BAD
    exit 9
endif


# see if user specified labels in the reference mtz
foreach arg ( \$refdata \$* )
    set temp = \`nawk -v arg=\$arg '\$NF==arg' \${tempfile}.labels\`
    if("\$temp" =~ J*) then
    	set input = mtzi
    	set ref_set = "\$temp[2]" 
    endif
    if("\$temp" =~ F*) then
    	set input = mtzf
    	set ref_set = "\$temp[2]" 
    endif
    if("\$temp" =~ *FreeR_flag) then
    	set ref_free = "\$temp[2]" 
    endif
    if("\$temp" =~ Q*) then
    	set ref_sig = "\$temp[2]" 
    endif
end
if(! \$?ref_set) then
    set temp = \`nawk '\$1=="J" || \$1=="F"{printf "%s ", \$0;getline;print \$NF}' \${tempfile}.labels | tail -1\`
    if("\$temp" =~ J*) then
    	set input = mtzi
    	set ref_set = "\$temp[2]" 
    endif
    if("\$temp" =~ F*) then
    	set input = mtzf
    	set ref_set = "\$temp[2]" 
    endif
    if(! \$?ref_set) then
	echo "ERROR: no data in \$reffile"
	rm -f \${tempfile}labels >& /dev/null
	exit 9
    endif
endif
# set up the sigma (if not aready done)
if(! \$?ref_sig) then
    set ref_sig = \`nawk -v set=\$ref_set '\$1=="Q"{Q=\$NF; if(nextq) exit} \$NF==set{nextq=1} END{print Q}' \${tempfile}.labels\`
endif
# use the FreeR flag (if it's there)
if(! \$?ref_free) then
    set temp = \`nawk 'tolower(\$NF) ~ /^freer/{print \$NF}' \${tempfile}.labels\`
    if("\$temp" != "") set ref_free = \$temp
endif
# set up rotaprep variables
if("\$input" == "mtzi") then
    set labin = "I=\$ref_set SIGI=\$ref_sig"
endif
if("\$input" == "mtzf") then
    set labin = "F=\$ref_set SIGF=\$ref_sig"
endif

echo "using \$ref_set \$ref_sig as the reference dataset"
if(\$?ref_free) then
    echo "(excluding free-R flagged hkls)"
endif

rm -f \${tempfile}.labels >& /dev/null

goto Return_from_Setup

EOF-script


chmod a+x $SCRIPT






















# landing point to regenerate scaling scripts
Scaling_Scripts:


RefScaleScript:
###############################################################################

 #####   ######  ######           ####   ######   #####
 #    #  #       #               #       #          #
 #    #  #####   #####            ####   #####      #
 #####   #       #                    #  #          #
 #   #   #       #               #    #  #          #
 #    #  ######  #                ####   ######     #

###############################################################################
#
#	scale and merge reference set
#
###############################################################################
set SCRIPT = ${SCRIPT_dir}/make_reference_set.com
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak

# set up the scaling card
set scale_card = "batch"
if("$SCALING" == "smooth") set scale_card = "rotation spacing $SPACING"
if("$BFACTOR" == "smooth") then
    set scale_card = "$scale_card brotation spacing $SPACING"
else
    set scale_card = "$scale_card bfactor on"
endif
set outputfile = ${refMTZ}
# don't overwrite a user-specified mtz file!
if($?USER_REFERENCE) set outputfile = mtz/reference.mtz

cat << EOF-script >! $SCRIPT
#! /bin/csh -fe
#
#   Reference set ($wave_reference) scaling, sorting, remergeing script by Scaler Elves
#
#
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk
#
set infile   = ${rawMTZ}
set outfile  = ${outputfile}
#
set tempfile = ${tempfile}make_ref
#
############################################################################
# now scan command line
foreach arg ( \$* )
    # specific input file
    if((\$arg =~ *.mtz)&&(-e \$arg)) then
	if(\$arg =~ *.mtz) set infile = "\$arg"
    endif
end
#
############################################################################
# first, pre-scale reference wavelength data
#
##########################################################################
scala hklin \$infile  hklout \$outfile \
  scales   \${tempfile}scales \
  rogues   \${tempfile}rogues \
  normplot \${tempfile}norm   \
  anomplot \${tempfile}anom << eof_scaleref
# save disk space
#NODUMP
#ANALYSE NOPLOT
# make sure scala doesnt do anyhting stupid like change the output filename
DNAME ref
PNAME ref

# resolution range
RESOLUTION $hiRES $loRES

EOF-script

# add runs for the reference set
cat $RUNFILE |\
 nawk -v wave=$wave_reference '$2=="wavelength"{p=0} \
         $0 ~ " " wave " " || wave=="all"{p=1} p{print}' |\
cat >> $SCRIPT

# plus the "universal" rules
echo "# global SCALA cards" >> $SCRIPT
cat $RULESFILE >> $SCRIPT

cat << EOF-script >> $SCRIPT

# one scale and one B-factor per frame
#scales batch bfactor on
# Elves choice
scales $scale_card
# 30 scaling iterations, or convergence, whichever comes first
cycles 30 converge 0.1
# eigenvalue filter, if convergence is oscillating
#filter 1.0e-6 0.01

# no need for anomalous diffs in reference set
anomalous off

eof_scaleref
if(\$status) set BAD

# clean up
rm -f \${tempfile}scales
rm -f \${tempfile}norm
rm -f \${tempfile}anom
rm -f \${tempfile}rogues

rm -f \${tempfile}prep.mtz
rm -f \${tempfile}ref.mtz


##########################################################################
if((-e \$outfile)&&(! \$?BAD)) then
    echo "\$outfile now contains a merged data set of ${wave_reference} "
else
    echo "scaling failed!  See above for why."
    exit 9
endif
##########################################################################

EOF-script


chmod a+x $SCRIPT
























RoughScale:
###############################################################################

 #####    ####   #    #   ####   #    #           ####    ####     ##    #       ######
 #    #  #    #  #    #  #    #  #    #          #       #    #   #  #   #       #
 #    #  #    #  #    #  #       ######           ####   #       #    #  #       #####
 #####   #    #  #    #  #  ###  #    #               #  #       ######  #       #
 #   #   #    #  #    #  #    #  #    #          #    #  #    #  #    #  #       #
 #    #   ####    ####    ####   #    #           ####    ####   #    #  ######  ######

###############################################################################
#
#	unified script scaling of ALL data (in both rough and smooth mode)
#
###############################################################################
set SCRIPT = ${SCRIPT_dir}/rough_scale.com
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak

# set up the scaling card
set scale_card = "batch"
if("$SCALING" == "smooth") set scale_card = "rotation spacing $SPACING"
if("$BFACTOR" == "smooth") then
    set scale_card = "$scale_card brotation spacing $SPACING"
else
    set scale_card = "$scale_card bfactor on"
endif
set useref = ""
if($?NO_REFERENCE) set useref = "#"

cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
#  Automatically generated multiwavelength SCALA script 
#  for rough (frame-wise) scaling of $wavenames together
#
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk
#
set infile   = ${sortMTZ}
set outfile  = ${rscaleMTZ}
set runfile  = $RUNFILE
#
set tempfile = ${tempfile}rough
#
# Defaults
set SPACING = $SPACING
set CYCLES  = $CYCLES
set FILTER  = "#"
#
# read command-line arguments
goto Setup
############################
#
# usage: $SCRIPT [cycles] spacing [spacing] filter
#    eg: $SCRIPT 50 spacing $SPACING filter
#         means 50 cycles, ${SPACING}-degree smoothing window and use the filter
#
############################
ReturnFromSetup:
######################################################################
#
# Now run SCALA
#
######################################################################
rm -f \${tempfile}temp.mtz >& /dev/null
scala hklin \$infile  hklout \${tempfile}temp.mtz \
  scales   \${tempfile}scales \
  rogues   badspots.txt       \
  normplot \${tempfile}norm   \
  anomplot \${tempfile}anom  << EOF-scale
# save disk space
#NODUMP
#ANALYSE NOPLOT

title "$TITLE"

# resolution range
RESOLUTION $hiRES $loRES

# reference data set ($wave_reference) is NOT merged
${useref}run 999 batch $ref_batch reference

@\$runfile

# global SCALA cards
`cat $RULESFILE`

#### treatment of partials ###### default, scale fulls only
#intensities partials		# use summed partials as fulls in scaling
#intensities scale_partials 0.8	# use extrapolated partials (by their fraction recorded) as fulls in scaling

#### type of scaling to be done ##########
#intensities anomalous                                    # scale I+ and I- as separate data sets
#scales batch bfactor off                                # one scale per frame
#scales batch bfactor on                                 # one scale and one B-factor per frame
#scales batch brotation spacing 10 bfactor on            # same, but B-factor smoothed over 10-degree window
#scales batch brotation spacing 10 bfactor anisotropic   # same, but anisotropic B-factor
#scales rotation spacing 10 bfactor on                   # scales smoothed over 10-degree window, B-factors follow SAME rule
#scales rotation spacing 10 detector 3 3                 # no B-factors, 9, smooth detector scales instead
#scales rotation spacing 10 secondary                    # spherical harmonics instead of B-factors
scales $scale_card	 # Scaler Elves's opinion

# perhaps tie these down to prevent wild swings in scaling? 
#tie rotation 0.2
#tie detector 0.2

cycles \$CYCLES converge 0.1	# need not converge completely
\${FILTER}filter 1.0e-4 0.01	# if convergence is oscillating

#reject byrun			# don't reject spots on comparison between wavelengths

# print out statistics for I+/I- instead of Imean
anomalous

# do not merge (yet)
output separate reference

EOF-scale
######################################################################
if(\$status) then
    echo "SCALA has crashed."
    
    # check to see if, perhaps, the mtz was produced anyway
    echo "go" | mtzdump hklin \${tempfile}temp.mtz >& /dev/null
    if(\$status) then
	# file was no good
	rm -f \${tempfile}temp.mtz >& /dev/null
    endif
    if(-e \${tempfile}temp.mtz) then
	echo "but seems to have produced the output file with its last breath..."
	echo "YOU SHOULD MAKE SURE IT'S STILL OKAY! "
    endif
endif
if(! -e \${tempfile}temp.mtz) then
    echo "This usually happens when you have bad frames, or too many degrees "
    echo "of freedom in your run."
    echo "Try eliminating bad frames, (look at them! ), or increasing the "
    echo "spacing of B factor smoothing, or turning off Bs and, especially, "
    echo "getting rid of things like anisotropic Bs"
    echo "Good luck! "
    echo ""
    # premature exit, allow user to see intermediate files
    exit 9
else
    # move file upon successful completion
    sortmtz hklout \$outfile << EOF
${USE_VRSET}VRSET -9E+38
H K L M/ISYM BATCH I SIGI
\${tempfile}temp.mtz
EOF
    rm -f \${tempfile}temp.mtz  >& /dev/null
endif

# clean up after scala
rm -f \${tempfile}scales        >& /dev/null
rm -f \${tempfile}norm	        >& /dev/null
rm -f \${tempfile}anom	        >& /dev/null

exit

##############################################################################
Setup:
# process command-line arguments
set i = 1
while ( \$i <= \$#argv )
    # define the input file
    if(("\$argv[\$i]" =~ *.mtz)&&(-e "\$argv[\$i]")) then
	set infile = "\$argv[\$i]"
    else
    
	# may override default spacing
	if("\$argv[\$i]" =~ spac*) then
	    @ i = ( \$i + 1)
	    if("\$argv[\$i]" =~ [1-9]*) set SPACING = "\$argv[\$i]"
	endif
    
	# may override default filtering
	if("\$argv[\$i]" == "filter") then
	    set FILTER = ""
	endif
    
	# raw numbers become cycle counts
	if(("\$argv[\$i]" =~ [1-9]*)&&("\$argv[\$i]" =~ *[0-9])) then
	    set CYCLES = "\$argv[\$i]"
	endif
    endif
    
    @ i = ( \$i + 1)
end
#
# Run List passed down from Scaler Elves
if(! -e "\$runfile" ) then
    # generate file containing list of scala runs
    cat << EOF-runs >! \$runfile
EOF-script
cat $RUNFILE >> $SCRIPT
cat << EOF-script >> $SCRIPT
EOF-runs
endif

goto ReturnFromSetup

EOF-script
chmod a+x $SCRIPT



























LocalScale:
###############################################################################

 #        ####    ####     ##    #        ####    ####     ##    #       ######
 #       #    #  #    #   #  #   #       #       #    #   #  #   #       #
 #       #    #  #       #    #  #        ####   #       #    #  #       #####
 #       #    #  #       ######  #            #  #       ######  #       #
 #       #    #  #    #  #    #  #       #    #  #    #  #    #  #       #
 ######   ####    ####   #    #  ######   ####    ####   #    #  ######  ######

###############################################################################
#
#	script for local-scaling of MAD data
#
###############################################################################
set SCRIPT = ${SCRIPT_dir}/localscale.com
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak

# the scaling card doesn't change here

set useref = ""
if($?NO_REFERENCE) set useref = "#"

cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
#  Automatically generated multiwavelength SCALA script 
#  for local-scaling of $wavenames together
#
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk
#
set infile   = ${rscaleMTZ}
set outfile  = ${lscaleMTZ}
set runfile  = $RUNFILE
#
set tempfile = ${tempfile}lscale
#
# Defaults
set SPACING = $SPACING
set CYCLES  = $CYCLES
set FILTER  = "#"
#
# read command-line arguments
goto Setup
############################
#
# usage: $SCRIPT [cycles] spacing [spacing] filter
#    eg: $SCRIPT 50 spacing $SPACING filter
#         means 50 cycles, ${SPACING}-degree smoothing window and use the filter
#
############################
ReturnFromSetup:
######################################################################
#
# make SURE the mtz is sorted
#
######################################################################
rm -f \${tempfile}temp.mtz >& /dev/null
sortmtz HKLOUT \${tempfile}in.mtz << EOF
${USE_VRSET}VRSET -9E+38
H K L M/ISYM BATCH I SIGI
\$infile
EOF

######################################################################
#
# Now run SCALA
#
######################################################################
rm -f \${tempfile}temp.mtz >& /dev/null
scala hklin \${tempfile}in.mtz  hklout \${tempfile}temp.mtz \
  scales   \${tempfile}scales \
  rogues   badspots.txt       \
  normplot \${tempfile}norm   \
  anomplot \${tempfile}anom  << EOF-scale
# save disk space
#NODUMP
#ANALYSE NOPLOT

title "$TITLE"

# resolution range
RESOLUTION $hiRES $loRES

# reference data set ($wave_reference) is NOT merged
${useref}run 999 batch $ref_batch reference

@\$runfile

# global SCALA cards
`cat $RULESFILE`

#### treatment of partials ###### default, scale fulls only
#intensities partials		# use summed partials as fulls in scaling
#intensities scale_partials 0.8	# use extrapolated partials (by their fraction recorded) as fulls in scaling

#### type of scaling to be done ##########
#intensities anomalous                                    # scale I+ and I- as separate data sets
#scales batch bfactor off                                # one scale per frame
#scales batch bfactor on                                 # one scale and one B-factor per frame
#scales batch brotation spacing 10 bfactor on            # same, but B-factor smoothed over 10-degree window
#scales batch brotation spacing 10 bfactor anisotropic   # same, but anisotropic B-factor
#scales rotation spacing 10 bfactor on                   # scales smoothed over 10-degree window, B-factors follow SAME rule
#scales rotation spacing 10 detector 3 3                 # no B-factors, 9, smooth detector scales instead
#scales rotation spacing 10 secondary                    # spherical harmonics instead of detector scales and B-factors
scales rotation spacing \$SPACING secondary               # Scaler Elves's opinion

# perhaps tie these down to prevent wild swings in scaling? 
#tie rotation 0.1
#tie detector 0.1
tie surface 0.001

cycles \$CYCLES converge 0.01	# need to converge completely
\${FILTER}filter 1.0e-4 0.01	# if convergence is oscillating

#reject byrun			# don't reject spots on comparison between runs

# print out statistics for I+/I- instead of Imean
anomalous

# do not merge (yet), but get rid of reference dataset
output separate


EOF-scale
######################################################################
if(\$status) then
    echo "SCALA has crashed."
    
    # check to see if, perhaps, the mtz was produced anyway
    echo "go" | mtzdump hklin \${tempfile}temp.mtz >& /dev/null
    if(\$status) then
	# file was no good
	rm -f \${tempfile}temp.mtz >& /dev/null
    endif
    if(-e \${tempfile}temp.mtz) then
	echo "but seems to have produced the output file with its last breath..."
	echo "YOU SHOULD MAKE SURE IT'S STILL OKAY! "
    endif
endif
if(! -e \${tempfile}temp.mtz) then
    echo "This usually happens when you have too many degrees of freedom in your run."
    echo "Try increasing the spacing of the smooth scales."
    echo "or decreasing the values in the tie cards."
    echo "Good luck! "
    echo ""
    # premature exit, allow user to see intermediate files
    exit 9
else
    # move file upon successful completion
    sortmtz hklout \$outfile << EOF
${USE_VRSET}VRSET -9E+38
H K L M/ISYM BATCH I SIGI
\${tempfile}temp.mtz
EOF
    rm -f \${tempfile}temp.mtz  >& /dev/null
endif

# clean up after scala
rm -f \${tempfile}in.mtz        >& /dev/null
rm -f \${tempfile}scales        >& /dev/null
rm -f \${tempfile}norm	        >& /dev/null
rm -f \${tempfile}anom	        >& /dev/null

exit

##############################################################################
Setup:
# process command-line arguments
set i = 1
while ( \$i <= \$#argv )
    # define the input file
    if(("\$argv[\$i]" =~ *.mtz)&&(-e "\$argv[\$i]")) then
	set infile = "\$argv[\$i]"
    else
    
	# may override default spacing
	if("\$argv[\$i]" =~ spac*) then
	    @ i = ( \$i + 1)
	    if("\$argv[\$i]" =~ [1-9]*) set SPACING = "\$argv[\$i]"
	endif
    
	# may override default filtering
	if("\$argv[\$i]" == "filter") then
	    set FILTER = ""
	endif
    
	# raw numbers become cycle counts
	if(("\$argv[\$i]" =~ [1-9]*)&&("\$argv[\$i]" =~ *[0-9])) then
	    set CYCLES = "\$argv[\$i]"
	endif
    endif
    
    @ i = ( \$i + 1)
end
#
# Run List passed down from Scaler Elves
if(! -e "\$runfile" ) then
    # generate file containing list of scala runs
    cat << EOF-runs >! \$runfile
EOF-script
cat $RUNFILE >> $SCRIPT
cat << EOF-script >> $SCRIPT
EOF-runs
endif

goto ReturnFromSetup

EOF-script
chmod a+x $SCRIPT

















###############################################################################

 #####    ####    ####    #####  #####   ######  ######
 #    #  #    #  #          #    #    #  #       #
 #    #  #    #   ####      #    #    #  #####   #####
 #####   #    #       #     #    #####   #       #
 #       #    #  #    #     #    #   #   #       #
 #        ####    ####      #    #    #  ######  #

###############################################################################
#
#	script for postrefinement (unsupported)
#
###############################################################################
set SCRIPT = ${SCRIPT_dir}/postref.com
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak

cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
#   Prototype postref script
#
#
#	Takes standard unmerged SCALA output (using OUTPUT SEPARATE), 
#	reformats it for POSTREF, and postrefines each frame against
#	the overall, merged data set.
#	the results are output as a file suitable for reinput into
#	scala for mergeing (NOT scaling).
#
#	Caveats:
#	- The SDCORR option given below should match that used in the
#	  SCALA run.
#
#
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk
############################################################################
#
set infile   = ./mtz/localscaled.mtz
set outfile  = ./mtz/postrefined.mtz
#
set tempfile = ${tempfile}postref_temp.
#
############################################################################
# now scan command line
foreach arg ( \$* )
    # specific input file
    if(-e "\$arg") then
	if(\$arg =~ *.mtz) then
	    set infile = "\$arg"
	endif
    endif
end
#
set MOSAICITY


Reformat:
############################################################################
# use scala to reformat data for postref
#
############################################################################
scala hklin \$infile  hklout \${tempfile}reformatted.mtz \\
  scales   \${tempfile}scales \\
  rogues   badspots.txt       \\
  normplot \${tempfile}norm   \\
  anomplot \${tempfile}anom  << EOF-reformat
# global SCALA cards
`cat $RULESFILE`

anomalous
initial none
noscale
cycles 0
output postref
#final none
EOF-reformat
rm -f \${tempfile}scales >& /dev/null
rm -f \${tempfile}anom >& /dev/null
rm -f \${tempfile}norm >& /dev/null

PreSort:
############################################################################
# re-sort scaled data for postref
#
############################################################################
sortmtz HKLOUT \${tempfile}sorted.mtz << EOF-sort
${USE_VRSET}VRSET -9E+38
BATCH H K L M/ISYM
\${tempfile}reformatted.mtz 
EOF-sort
rm -f \${tempfile}reformatted.mtz >& /dev/null

Rename:
# rename data to protect them from postref
# and use the sigma-corrected intensities
mtzutils hklin \${tempfile}sorted.mtz hklout \${tempfile}postrefme.mtz << EOF-utils
# postref changes ROT to range -180 to 180, which SCALA will not accept
#COLUMN_LABELS oldROT=ROT
# use SCALA's corrected sigmas
COLUMN_LABELS oldSIGI=SIGI oldSIGIPR=SIGIPR
COLUMN_LABELS SIGI=SIGIC   SIGIPR=SIGIPRC
EOF-utils
# bypass this? use SDFAC below? 
#mv \${tempfile}sorted.mtz \${tempfile}postrefme.mtz 
rm -f \${tempfile}sorted.mtz >& /dev/null


############################################################################
# examine file for lack of mosaicity (denzo data)
############################################################################
echo "BATCH" | mtzdump HKLIN \${tempfile}postrefme.mtz |\\
nawk '\$1 == "Mosaicity" {print \$NF; if(\$NF+0 == 0) badmos=1} \\
 badmos && /Phi angles/ {print \$NF - \$(NF-1)}' |\\
nawk '{sum += \$1;++n} END{if(n && badmos) print "MOSAICITY" sum/n}' |\\
cat >! \${tempfile}mos
# set mosaicity to osc/2 if no mosaicity given
set MOSAICITY = \`cat \${tempfile}mos\`
rm -f \${tempfile}mos

Postref:
############################################################################
# now do postrefining
#
############################################################################
postref SUMMARY \${tempfile}summary \\
          HKLIN \${tempfile}postrefme.mtz \\
          HKLOUT \${tempfile}postrefed.mtz  << eof_postref

TITLE postreffing \${infile}

# no need for SDFAC if we use SIGIC
#`nawk '/SDCORR/{print \$1,\$2,\$NF}' $RULESFILE`
#BEAM 0 0  0.0001 0
\$MOSAICITY

# make sure this converges
REFINE NCYC 100 CONVRG 0.02
# default values for "REJECT" card:
REJECT 1000  -3 -3  0.5 3 0.0 0.0 10 

# refine all batches, individually, against the merged data
BATCH ALL
#CRYSTAL 1 to 77

OUTPUT ALL

END
eof_postref
rm -f \${tempfile}postrefme.mtz

# reformat output MTZ for input into SCALA again
mtzutils hklin \${tempfile}postrefed.mtz hklout \${tempfile}sortme.mtz << EOF-utils
# strip off incomplete line (will crash next postref run)
#EXCLUDE SIGSCALE SIGFRACTIONCALC IMEAN SIGIMEAN ISUM SIGISUM
# scala will crash if ROT is changed
#COLUMN_LABELS ROT=oldROT
# rename sigmas back to original values
COLUMN_LABELS SIGIC=SIGI   SIGIPRC=SIGIPR
COLUMN_LABELS SIGI=oldSIGI SIGIPR=oldSIGIPR
EOF-utils
#mv \${tempfile}postrefed.mtz \${tempfile}sortme.mtz >& /dev/null
rm -f \${tempfile}postrefed.mtz >& /dev/null

# re-sort for scala
sortmtz HKLOUT \${outfile} << EOF-sort
${USE_VRSET}VRSET -9e+38
H K L M/ISYM BATCH I SIGI
\${tempfile}sortme.mtz
EOF-sort
# clean up
rm -f \${tempfile}sortme.mtz




# do xloggraph version of summary
cat << EOF-export >! \${tempfile}awkscript
#! /bin/nawk -f
#
#	Sumarize postref summary file in xloggraph format
#
#

# only read number-only lines
! /[a-z]/ && NF>1 {
    ++n; 
    
    # remember actual batch number
    batch[n] = \\\$1;
    
    # get cell parameters
    a[n]=\\\$2; a["avg"]+=a[n];
    b[n]=\\\$3; b["avg"]+=b[n]; 
    c[n]=\\\$4; c["avg"]+=c[n];
    A[n]=\\\$5; A["avg"]+=A[n];
    B[n]=\\\$6; B["avg"]+=B[n];
    G[n]=\\\$7; G["avg"]+=G[n];

    # get crystal orientation shifts
    phiX[n]= \\\$9; phiX["avg"] += phiX[n];
    phiY[n]=\\\$10; phiY["avg"] += phiY[n];
    phiZ[n]=\\\$11; phiZ["avg"] += phiZ[n];

    DphiX[n] = \\\$9-\\\$12;
    DphiY[n] = \\\$10-\\\$13;
    DphiZ[n] = \\\$11-\\\$14;
    
    # get mosaicity
    mos[n] = \\\$15; mos["avg"]+=mos[n];
    if(mos["max"] < mos[n]) mos["max"] = mos[n]
}

END{
    compute average values
    if(n)
    {
	a["avg"]=a["avg"]/n;
	b["avg"]=b["avg"]/n; 
	c["avg"]=c["avg"]/n;
	A["avg"]=A["avg"]/n;
	B["avg"]=B["avg"]/n;
	G["avg"]=G["avg"]/n;
    
	mos["avg"]=mos["avg"]/n;
    
	# print xloggraph plot header
	print " \\\$TABLE : - Elven Postref Plots:"
	print " \\\$GRAPHS:Unit Cell (deviations from mean):A:1, 3, 4, 5, 6, 7, 8:"
	print "        :Crystal Orientation (change during refinement):A:1, 9, 10, 11:"
	print "        :Crystal Slippage (orientation change during exposure):A:1, 12, 13, 14:"
	printf "        :Crystal Mosaicity (mean=%.3f max=%.3f):A:1, 15:\\\\n", mos["avg"], mos["max"]
	print " \\\$\\\$"
	print " frame batch A B C alpha beta gamma phiX phiY phiZ delta_phiX delta_phiY delta_phiZ mosaicity \\\$\\\$"
	print " \\\$\\\$"
    
	# compute deviates of the cell
	for(i=1;i<=n;++i)
	{
	    # compute deviates of the cell
	    a[i]-=a["avg"];
	    b[i]-=b["avg"]; 
	    c[i]-=c["avg"];
	    A[i]-=A["avg"];
	    B[i]-=B["avg"];
	    G[i]-=G["avg"];
	    
	    # print out interesting numbers as graphs
	    printf("%4d %6d %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f   %7.2f %7.2f %7.2f   %7.2f %7.2f %7.2f   %7.3f\\\\n", \\\\
	    i, batch[i], a[i], b[i], c[i], A[i], B[i], G[i], \\\\
	    phiX[i], phiY[i], phiZ[i], DphiX[i], DphiY[i], DphiZ[i], mos[i]);
	}

	print "\\\$\\\$"
	
	printf "Average cell is  : %.2f %.2f %.2f %.2f %.2f %.2f\\\\n", a["avg"],  b["avg"],  c["avg"],  A["avg"],  B["avg"],  G["avg"];
	printf "Average mosaicity: %.3f\\\\n", mos["avg"];
	printf "Maximum mosaicity: %.3f\\\\n", mos["max"];
    }    
}
EOF-export

# dump results to stdout
nawk -f \${tempfile}awkscript  \${tempfile}summary

# finish cleaning up
rm -f \${tempfile}awkscript 
rm -f \${tempfile}summary

##########################################################################
if(-e \$outfile) then
    echo "\$outfile now contains a postrefined version of \$infile "
endif
##########################################################################

exit

EOF-script
chmod a+x $SCRIPT


















Merger:
############################################################################

 #    #  ######  #####    ####   ######  #####
 ##  ##  #       #    #  #    #  #       #    #
 # ## #  #####   #    #  #       #####   #    #
 #    #  #       #####   #  ###  #       #####
 #    #  #       #   #   #    #  #       #   #
 #    #  ######  #    #   ####   ######  #    #

############################################################################
#
# Generate script for mergeing each wavelength from final scaling file
#
############################################################################
set SCRIPT = ${SCRIPT_dir}/merge.com
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak

cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
#  Automatically generated script 
#  for mergeing one or all of $wavenames
#
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk
#
set infile   = ${lscaleMTZ}
set outfile  = merged.mtz
set runfile  = $RUNFILE
#
set tempfile = ${tempfile}merge
#
# Default to mergeing ALL data together (useful? )
if(! \$?wave) set wave     = "all"
set hires    = $hiRES
#
# read command-line arguments
goto Setup
############################
#
# usage: $SCRIPT [wavename] [high_RES] [SG]
#    eg: $SCRIPT $wave_reference $hiRES $SG
#        means merge frames from $wave_reference to $hiRES A, in $SG
#
############################
ReturnFromSetup:
######################################################################
#
# Now run SCALA (to merge, not scale data)
#
######################################################################
rm -f \${tempfile}merged.mtz >& /dev/null
scala hklin \$infile  hklout \${tempfile}merged.mtz \
  scales   \${tempfile}scales \
  rogues   badspots.\$wave.txt     \
  normplot \${tempfile}norm \
  anomplot \${tempfile}anom  << EOF-scale
# save disk space
#NODUMP
#ANALYSE NOPLOT
# make sure scala doesnt do anyhting stupid like change the output filename
DNAME \$wave
PNAME \$wave

title "merged \$wave from $TITLE"

# resolution range
RESOLUTION \$hires $loRES

@\${tempfile}runlist

# global SCALA cards
`cat $RULESFILE`

# treatment of partials	    # default, sum partials, and use result in analysis
#final fulls		    # use fulls only in analysis
#final scale_partials 0.5   # treat partials with fraction recorded > 0.5 as fulls in mergeing

onlymerge
noscale			    # should apply old scales!
#scales batch bfactor off

# print out statistics for I+/I- instead of Imean
# changing this to "off" will NOT affect output Imean
anomalous on

EOF-scale
##############################################################################
if(\$status) then
    echo "SCALA crashed! "
    # check to see if file was produced anyway
    echo "go" | mtzdump hklin \${tempfile}merged.mtz >& /dev/null
    if(\$status) then
	# file was no good
	rm -f \${tempfile}merged.mtz >& /dev/null
	set BAD
    else
	echo "...but seems to have produced the output data file before it did."
	echo "YOU SHOULD CHECK AND SEE IF THIS IS OKAY!!! "
    endif
endif

# clean up after scala
rm -f \${tempfile}runlist       >& /dev/null
rm -f \${tempfile}scales        >& /dev/null
rm -f \${tempfile}norm	        >& /dev/null
rm -f \${tempfile}anom	        >& /dev/null

truncate:
##############################################################################
# run truncate to make "F"s from "I"s
#
##############################################################################
truncate hklin \${tempfile}merged.mtz \
        hklout \${outfile} << EOF-trunc | tee \${tempfile}temp.log

title "merged \$wave from $TITLE"

# overall resolution range 
RESOLUTION \$hires $loRES
# resolution used to determine B factor
#rscale 4 2.8
#ranges 0.01

# needed to determine absolute scale
nresidue $NRES

# roll-off negative intensities with French-Wilson procedure
truncate yes
# changing this to "no" will NOT affect output Fs
anomalous yes
LABOUT  F=F  SIGF=SIGF DANO=DANO SIGDANO=SIGDANO ISYM=ISYM
EOF-trunc
##############################################################################
if(\$status) then
    # see if this is a fixable problem
    grep "Data beyond useful resolution limit" \${tempfile}temp.log >& /dev/null
    if(\$status) then
	# nope
	set BAD
    else
	# we know how to fix this (and there is no reason not to)
	cat \${tempfile}temp.log | nawk '/Wilson Plot/,/TRUNCATE/' |\\
        nawk 'NF==10 && ! /[a-z]/{print \$6}' |\\
        tail -1 >! \${tempfile}res
        set hires = \`cat \${tempfile}res\`
        rm -f \${tempfile}res
	
        if("\$hires" == "") then
            set hires = \`awk '/Negative or zero mean I/ && \$NF+0>0.1{print \$NF+0}' \${tempfile}temp.log | tail -1\`
        endif

	# go back, and try this again
	if("\$hires" != "") then
	    set newRES = "\$hires"
	    # prevent infinite loops if truncate is messed up
	    if(! \$?retries) set retries = 0
	    @ retries = ( \$retries + 1 )
	    if(\$retries <= 5) then
	        goto truncate
	    endif
	    set BAD
	endif
	set BAD
    endif
endif

# delete intermediate files
rm -f \${tempfile}merged.mtz	>& /dev/null
rm -f \${tempfile}temp.log	>& /dev/null
rm -f \${tempfile}reindexed.mtz >& /dev/null

##############################################################################
if(! \$?BAD) then
    if(\$?newRES) echo "WARNING: output resolution reduced to \$hires A"
    echo "merged \$wave data is in \${outfile}"
else
    echo "mergeing failed!  See above for why."
    exit 9
endif

exit

##############################################################################
Setup:
#
# Run List passed down from Scaler Elves
if(! -e "\$runfile" ) then
    # generate file containing list of scala runs
    cat << EOF-runs >! \$runfile
EOF-script
cat $RUNFILE >> $SCRIPT
cat << EOF-script >> $SCRIPT
EOF-runs
endif
#
# process command-line arguments
set newSG = ""
set axes  = ""
foreach arg ( \$* )
    # define the input file
    if(("\$arg" =~ *.mtz)&&(-e "\$arg")) then
	set infile = "\$arg"
    else
    
	# look for wavelength name
	set temp = \`grep "wavelength" \$runfile | grep " \$arg " \`
	if("\$temp" != "") then
	    # must be the wavelength user wants
	    set wave = "\$arg"
	else
	
	    # look for new resolution limit
	    set temp = \`echo "\$arg" | nawk '\$1+0<10 && \$1+0>0.4 {print \$1+0}'\`
	    if("\$temp" != "") then
		# must be the resolution user wants
		set hires = "\$temp"
	    endif
	
	endif
    endif
    
    # check for new space group
    if("\$arg" =~ [PpCcIiFfRrHh][1-6]*) then
	# check for SGs listed in library (but not the screwy ones)
	set temp = \`echo \$arg | nawk '{print toupper(\$1)}'\`
	if(\$?CLIBD) then
	    set temp = \`nawk -v SG=\$temp '\$4 == SG && \$1 < 500 {print \$4}' \$CLIBD/symop.lib | head -1\`
	endif
	if("\$temp" != "") then
	    set newSG = "\$temp"
	endif
	
	set temp = \`echo \$arg | nawk '{print toupper(\$1)}'\`
	# check for orthorhombic "pseudo-spacegroup" language
	if("\$temp" == P2221) then
	    # P2221 with screw along longest axis
	    set axes  = "a b c"
	    set newSG = "P2221"
	    continue
	endif
	if("\$temp" == P2212) then
	    # P2221 with screw along mid-length axis
	    set axes  = "b c a"
	    set newSG = "P2221"
	    continue
	endif
	if("\$temp" == P2122) then
	    # P2221 with screw along shortest axis
	    set axes  = "c a b"
	    set newSG = "P2221"
	    continue
	endif
	
	if("\$temp" == P21212) then
	    # P21212 with non-screw along longest axis
	    set axes  = "a b c"
	    set newSG = "P21212"
	    continue
	endif
	if("\$temp" == P21221) then
	    # P21212 with non-screw along mid-length axis
	    set axes  = "b c a"
	    set newSG = "P21212"
	    continue
	endif
	if("\$temp" == P22121) then
	    # P21212 with non-screw along shotest axis
	    set axes  = "c a b"
	    set newSG = "P21212"
	    continue
	endif
	    
	if("\$temp" == C2221) then
	    # C2221 with screw along longest axis
	    set axes  = "a b c"
	    set newSG = "C2221"
	    continue
	endif
	# are these legal? 
	if("\$temp" == C2212) then
	    # C2221 with screw along mid-length axis
	    set axes  = "b c a"
	    set newSG = "C2221"
	    continue
	endif
	if("\$temp" == C2122) then
	    # C2221 with screw along shortest axis
	    set axes  = "c a b"
	    set newSG = "C2221"
	    continue
	endif
    endif
	
    # check for flipping of ambiguous axes
    if("\$arg" == "flip") then
	# user requested "flip" of axes
	set FLIP
	# this should work for all P4x, P3x, P6x
	set REINDEX = "reindex k, h, -l"
	set message = "with a and b axes flipped"
	
	# are there any others?
    endif
    
    # what about cubic? 
end
#
# decide on new axis ordering (for asymmetric orthorhombics)
#if("\$newSG" == "P222")    set axes = "a b c"
#if("\$newSG" == "P212121") set axes = "a b c"
if("\$axes" != "") then
    # get current axis ordering
    set CELL = \`echo "head" | mtzdump hklin \$infile | nawk '/Cell Dimensions/{getline;getline;print}'\`
    # find out what the cannonical one would be
    # then decide how to go from current ordering to the desired one
    echo "\$CELL" | nawk '{\\
	# print out current axis order \\
	print \$1, "h"; print \$2, "k"; print \$3, "l"}' |\\
    sort -n |\\
    nawk '\\
	# add cannonical axis names\\
	NR==1{print \$0, "a"} NR==2{print \$0, "b"} NR==3{print \$0, "c"}' |\\
    nawk -v axes="\$axes" 'BEGIN{split(axes, abc)} {\\
	# write desired axis ordering in front of cannonical one \\
	print abc[NR], \$0}' |\\
    sort |\\
    cat >! \${tempfile}order    
    set REINDEX = "reindex "\`nawk '{printf "%s", \$3} NR~/[12]/{print ","}'  \${tempfile}order\`

    # this should give us a mapping between any two orthorhombics
    
    set temp = \`tail -1 \${tempfile}order | nawk '{print \$NF}'\`
    rm -f \${tempfile}order
    if(("\$newSG" == "P2221")||("\$newSG" == "C2221")) then
	set message = " with screw along \$temp axis"
    endif
    if("\$newSG" == "P21212") then
	set message = " with non-screw along \$temp axis"
    endif
endif

if((! \$?REINDEX) && ("\$newSG" != "")) set REINDEX

if(\$?REINDEX) then
    if("\$newSG" != "") set newSG = "SYMM \$newSG"

    # reindex the input file to the new space group
    reindex HKLIN \$infile HKLOUT \${tempfile}sortme.mtz << EOF-reindex
\$newSG
\$REINDEX
EOF-reindex
    
    # sort it too, for good measure
    sortmtz HKLIN \${tempfile}sortme.mtz HKLOUT \${tempfile}reindexed.mtz << EOF
${USE_VRSET}VRSET -9e+38
H K L M/ISYM BATCH I SIGI
EOF
    
    rm -f  \${tempfile}sortme.mtz
    # scale and merge this file
    set infile = \${tempfile}reindexed.mtz
endif
#
# Extract runs for the desired wavelength
cat \$runfile |\
nawk -v wave="\$wave" '\$2=="wavelength"{p=0} \$0 ~ " " wave " "{p=1} p{print}' |\
cat >! \${tempfile}runlist
#
set temp = \`cat \${tempfile}runlist | wc -l\`
if(\$temp == 0) then
    # default to all runs
    cat \$runfile >! \${tempfile}runlist
endif

goto ReturnFromSetup

EOF-script
chmod a+x $SCRIPT


























Extractor:
############################################################################

 ######  #    #   #####  #####     ##     ####    #####   ####   #####
 #        #  #      #    #    #   #  #   #    #     #    #    #  #    #
 #####     ##       #    #    #  #    #  #          #    #    #  #    #
 #         ##       #    #####   ######  #          #    #    #  #####
 #        #  #      #    #   #   #    #  #    #     #    #    #  #   #
 ######  #    #     #    #    #  #    #   ####      #     ####   #    #

############################################################################
#
# Generate script for extracting an unmerged wavelength from final scaling file
#
############################################################################
set SCRIPT = ${SCRIPT_dir}/extract.com
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak

cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
#  Automatically generated script 
#  for extracting one of $wavenames from an MTZ
#
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk
#
set infile   = ${lscaleMTZ}
set outfile  = unmerged.mtz
set runfile  = $RUNFILE
#
set tempfile = ${tempfile}extract
#
# Default to all data (useful? )
if(! \$?wave) set wave     = "all"
set hires    = $hiRES
#
# read command-line arguments
goto Setup
############################
#
# usage: $SCRIPT [wavename] [high_RES]
#    eg: $SCRIPT $wave_reference $hiRES
#        means extract frames from $wave_reference to $hiRES A
#
############################
ReturnFromSetup:
######################################################################
#
# Now run SCALA (to add partials, not scale data)
#
######################################################################
rm -f \${tempfile}unmerged.mtz >& /dev/null
scala hklin \$infile  hklout \${tempfile}unmerged.mtz \
  scales   \${tempfile}scales \
  rogues   \${tempfile}rogues \
  normplot \${tempfile}norm   \
  anomplot \${tempfile}anom  << EOF-scale
# save disk space
#NODUMP
#ANALYSE NOPLOT
DNAME \$wave
PNAME \$wave

title "unmerged \$wave from $TITLE"

# resolution range
RESOLUTION \$hires $loRES

@\${tempfile}runlist

# global SCALA cards
`cat $RULESFILE`

# treatment of partials	    # default, sum partials, and use result in analysis
#final fulls		    # use fulls only in analysis
#final scale_partials 0.5   # treat partials with fraction recorded > 0.5 as fulls in mergeing

output unmerged original    # don't merge to ASU, but add partials
noscale			    # should apply old scales!
onlymerge
#scales batch

# changing this to "off" will NOT affect output Is
anomalous on

EOF-scale
##############################################################################
if(\$status) then
    echo "SCALA crashed! "
    # check to see if file was produced anyway
    echo "go" | mtzdump hklin \${tempfile}unmerged.mtz >& /dev/null
    if(\$status) then
	# file was no good
	rm -f \${tempfile}unmerged.mtz >& /dev/null
    else
	echo "...but seems to have produced the output data file before it did."
	echo "YOU SHOULD CHECK AND SEE IF THIS IS OKAY! "
    endif
endif

##############################################################################
# take existence of output file as signal of a sucessful run
if(-e \${tempfile}unmerged.mtz) then
    mv \${tempfile}unmerged.mtz \${outfile}
    echo "unmerged \$wave data is in \${outfile}"
else
    echo "extraction failed!  See above for why."
    exit 9
endif

# clean up after scala
rm -f \${tempfile}rogues        >& /dev/null
rm -f \${tempfile}runlist       >& /dev/null
rm -f \${tempfile}scales        >& /dev/null
rm -f \${tempfile}norm	        >& /dev/null
rm -f \${tempfile}anom	        >& /dev/null

exit

##############################################################################
Setup:
#
# Run List passed down from Scaler Elves
if(! -e "\$runfile" ) then
    # generate file containing list of scala runs
    cat << EOF-runs >! \$runfile
EOF-script
cat $RUNFILE >> $SCRIPT
cat << EOF-script >> $SCRIPT
EOF-runs
endif
#
# process command-line arguments
foreach arg ( \$* )
    # define the input file
    if(("\$arg" =~ *.mtz)&&(-e "\$arg")) then
	set infile = "\$arg"
    else
    
	# look for wavelength name
	set temp = \`grep "wavelength" \$runfile | grep " \$arg " \`
	if("\$temp" != "") then
	    # must be the wavelength user wants
	    set wave = "\$arg"
	else
	
	    # look for new resolution limit
	    set temp = \`echo "\$arg" | nawk '\$1+0<10 && \$1+0>0.4 {print \$1+0}'\`
	    if("\$temp" != "") then
		# must be the resolution user wants
		set hires = "\$temp"
	    endif
	
	endif
    endif
end
#
# Extract runs for desired wavelength
cat \$runfile |\
nawk -v wave="\$wave" '\$2=="wavelength"{p=0} \$0 ~ " " wave " "{p=1} p{print}' |\
cat >! \${tempfile}runlist
#
set temp = \`cat \${tempfile}runlist | wc -l\`
if(\$temp == 0) then
    echo "WARNING: no wavelength specified for extraction. "
    echo "extracting all data"
    echo "usage: \$0 [$wavenames] [2A]"
    echo ""
    echo "where:"
    echo "$wavenames[1], etc.  is a wavelength name"
    echo "2A is the (optional) resolution cutoff"
    echo ""

    # default to all runs
    cat \$runfile >! \${tempfile}runlist
endif

goto ReturnFromSetup

EOF-script
chmod a+x $SCRIPT


























ScaleitScript:
############################################################################

  ####    ####     ##    #       ######     #     #####
 #       #    #   #  #   #       #          #       #
  ####   #       #    #  #       #####      #       #
      #  #       ######  #       #          #       #
 #    #  #    #  #    #  #       #          #       #
  ####    ####   #    #  ######  ######     #       #

############################################################################
#
# Generate script for combining merged Fs into a single file
#
############################################################################
set SCRIPT = ${SCRIPT_dir}/scaleit.com
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak

cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
#  Automatically generated scaleit script 
#  for combining $wavenames into a single MTZ
#
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk
#
set outfile     = cadded.mtz
set scaledfile  = scaleited.mtz
#
set tempfile = ${tempfile}scaleit
#
#################################################
# use CAD to put all wavelengths side-by-side
EOF-script

cat << EOF-awk >! ${tempfile}.awk
#! $nawk -f
#
#  make a com file for CADing an arbitrary number of items together
#  using the CCP4 program CAD
#
#
BEGIN{
    if(! dir) dir = "./mtz/"
}

{
    print "cad \\\\";

    for(word=1; word<=NF; ++word)
    {
	++n;
	printf "   HKLIN%d %s.mtz \\\\\\n", n, dir \$word;
	
	labin[n] = "LABIN  FILE " n " E1=F       E2=SIGF       E3=DANO        E4=SIGDANO        E5=ISYM\\n";
	labin[n] = labin[n] "CTYP   FILE " n " E1=F       E2=Q          E3=D           E4=Q              E5=Y\\n";
	labin[n] = labin[n] sprintf("LABOUT FILE %d E1=%-7s E2=SIG%-7s E3=DANO%-7s E4=SIGDANO%-7s E5=ISYM%s\\n", n, \$word, \$word, \$word, \$word, \$word);
	
	if((n == 9)||(word == NF))
	{
	    print "HKLOUT \${tempfile}cad.mtz << EOF-cad";
	    print ""
	    print "\\043 rename data after their original wavelength";
	    
	    for(i=1;i<=n;++i)
	    {
		print labin[i];
	    }
	    
	    print "END"
	    print ""
	    print "EOF-cad"
	    print "if(\$status) exit 1"
	    print ""
	    
	    if(word < NF)
	    {
		# not done yet, so set up reinput
		print ""
		print "mv \${tempfile}cad.mtz \${tempfile}cadin.mtz"
		print ""
		print "cad \\\\"
		
		n = 1;
		print "   HKLIN1 \${tempfile}cadin.mtz \\\\";
		
		labin[n] = "LABIN  FILE 1 ALL\\nCTYP   FILE 1 ALL\\nLABOUT FILE 1 ALL\\n"
	    }
	}
    }
}
EOF-awk

echo "$wavenames" |\
nawk -f ${tempfile}.awk >> $SCRIPT
rm -f ${tempfile}.awk

cat << EOF-script >> $SCRIPT


#################################################
# shouldn't need any more scaling, 
mv \${tempfile}cad.mtz \${outfile}

# but, Phil's example said to do scaleit.
EOF-script
foreach wave ( $wavenames )
    echo 'if("$1" == "'$wave'") goto '$wave  >> $SCRIPT
end
cat << EOF-scriptbit >> $SCRIPT
if("\$1" == "all") set DOALL

# default to scaling reference
if(! \$?DOALL) goto $wave_reference

EOF-scriptbit

# make a scaleit run for each wave as a reference
foreach ref ( $wavenames )

    cat << EOF-wave >> $SCRIPT

${ref}:
#################################################
# $ref as reference
scaleit HKLIN \${outfile}  HKLOUT \$scaledfile << EOF-scaleit

TITLE Scale $ref to rest.
RESO $hiRES $loRES		# Usually better to exclude lowest resolution data
WEIGHT			#   Sigmas should be reliable.
refine anisotropic	# use an anisotropic B-factor
#norefine		# don't change relative scale

EOF-wave

#    # scale ref against everything but ref
#    echo "$wavenames" |\
#    nawk '{for(i=1;i<=NF;++i){print $i}}' |\
#    nawk -v wave=$ref 'wave == $NF{print "LABIN FP=" $NF "    SIGFP=SIG" $NF " -"} \
#	wave != $NF {++n; F[n] = $NF} END{for(i=1;i<=n;++i){\
#	printf "    FPH%d=%s SIGFPH%d=SIG%s DPH%d=DANO%s SIGDPH%d=SIGDANO%s ", i, F[i], i, F[i], i, F[i], i, F[i]; \
#	if(i != n) printf "-";\
#	print "";}}' >> $SCRIPT 

    # scale ref against everything (including ref)
    echo "$wavenames" |\
    nawk '{for(i=1;i<=NF;++i){print $i}}' |\
    nawk -v wave=$ref 'wave == $NF{print "LABIN FP=" $NF "    SIGFP=SIG" $NF " -"} \
	{++n; F[n] = $NF} END{for(i=1;i<=n;++i){\
	printf "    FPH%d=%s SIGFPH%d=SIG%s DPH%d=DANO%s SIGDPH%d=SIGDANO%s ", i, F[i], i, F[i], i, F[i], i, F[i]; \
	if(i != n) printf "-";\
	print "";}}' >> $SCRIPT 

    cat << EOF-wave >> $SCRIPT
CONV ABS 0.0001 TOLR 0.000000001 NCYC 4
END
EOF-scaleit
if(\$status) set BAD

if(! \$?DOALL) goto cleanup

EOF-wave

end

cat << EOF-script >> $SCRIPT
cleanup:
# clean up?


if(! \$?BAD) then
    echo "$wavenames are now all in \$outfile"
else
    echo "scaleit failed."
    exit 9
endif

EOF-script
chmod a+x $SCRIPT



































Makefile:
###############################################################################

#     #
##   ##    ##    #    #  ######  ######     #    #       ######
# # # #   #  #   #   #   #       #          #    #       #
#  #  #  #    #  ####    #####   #####      #    #       #####
#     #  ######  #  #    #       #          #    #       #
#     #  #    #  #   #   #       #          #    #       #
#     #  #    #  #    #  ######  #          #    ######  ######

###############################################################################
#
#	Create a Makefile for restarting Scaler procedures after a crash
#
###############################################################################

# cache filenames for use as dependencies
set MERGEfiles = `echo $wavenames $MTZ_dir | nawk '{for(i=1;i<NF;++i){printf $NF "/" $i ".mtz\n" }}' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
echo $wavenames $SOLVE_dir |\
nawk '{for(i=1;i<NF;++i){printf $NF "/" $i ".unmerged.fmt\n" };\
       for(i=1;i<NF;++i){printf $NF "/" $i ".fmt\n" }}' |&\
cat >! ${tempfile}SOLVEfiles 
set SOLVEfiles = `nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}' ${tempfile}SOLVEfiles`
rm -f ${tempfile}SOLVEfiles >& /dev/null
set SHELXafiles = `echo $wavenames $SHELX_dir | nawk '{for(i=1;i<NF;++i){printf $NF "/" $i "_ano.hkl\n" }}' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
set SHELXifiles = `echo $wavenames $SHELX_dir | nawk '{for(i=1;i<NF;++i){for(j=1;j<i;++j){printf $NF "/" $i "-" $j ".hkl\n" }}}' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
set XPLORfiles = `echo $wavenames $XPLOR_dir | nawk '{for(i=1;i<NF;++i){printf $NF "/" $i "_anom.fobs\n" }}' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
set RAWfiles = `cat ${SCRIPT_dir}/sort_everything.com | nawk '/^rebatch/{print $3}' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
set REVISE_Patt = ${MAP_dir}/FM_Patt.map
set REVISE_hkl = ${SHELX_dir}/fm.hkl
# don't run revise for single-wavelength data
if($#wavelengths < 2) then
    set REVISE_Patt = ""
    set REVISE_hkl = ""
endif

cat << EOF >! Makefile
#SHELL = /bin/csh

# aliases for common targets

all: SHARP SOLVE SHELX XPLOR Pattersons

frugal:
	@ echo "clearing out large intermediate files"
	rm -f ${rawMTZ} ${sortMTZ} ${rscaleMTZ} ${lscaleMTZ} ${LOG_dir}/*.old*

SHARP: ${finalMTZ}
	@ echo "${finalMTZ} is ready for input into SHARP or mlphare."

SOLVE: $SOLVEfiles
	@ echo "${SOLVE_dir}/*.fmt are ready for input into SOLVE"

SHELX: ${SHELX_dir}/ano.hkl ${SHELX_dir}/iso.hkl ${SHELX_dir}/fh.hkl $REVISE_hkl
	@ echo "${SHELX_dir}/*.hkl are ready for input into SHELX"

XPLOR: $XPLORfiles
	@ echo "${XPLOR_dir}/*.fobs are ready for input into XPLOR's mad_merge.inp"

Pattersons: ${MAP_dir}/FH_Patt.map $REVISE_Patt
	@ echo "Pattersons are ready in ${MAP_dir}"

##############################################################################
# Actual file targets for sorting, scaling, and localscaling

# sort all raw wavelength data together (and scale reference set)
${rawMTZ}: ${SCRIPT_dir}/sort_everything.com $RAWfiles
	@ if test -f $sortLOG; then mv $sortLOG ${sortLOG}.old ; fi
	@ echo "sorting raw data ..."
	@ ${SCRIPT_dir}/sort_everything.com > $sortLOG

EOF

if(! $?USER_REFERENCE) then
    cat << EOF >> Makefile
# scale and merge the reference set
${refMTZ}: ${rawMTZ} ${SCRIPT_dir}/make_reference_set.com 
	@ if test -f $refLOG; then mv $refLOG ${refLOG}.old ; fi
	@ echo "prescaling reference set (${wave_reference}) ... (see $refLOG)"
	@ ${SCRIPT_dir}/make_reference_set.com ${rawMTZ} ${EXTRA_ARGS} > $refLOG

EOF
endif

cat << EOF >> Makefile
# import the reference set into the raw data file
${sortMTZ}: ${refMTZ} ${rawMTZ} ${SCRIPT_dir}/import_reference.com 
	@ if test ! -f $refLOG; then echo "" > $refLOG ; fi
	@ echo "importing the reference set ... (see $refLOG)"
	@ ${SCRIPT_dir}/import_reference.com ${refMTZ} $refMTZset FreeR_flags ${rawMTZ} ${EXTRA_ARGS} >> $refLOG

# rough scaling
${rscaleMTZ}: ${sortMTZ} ${SCRIPT_dir}/rough_scale.com $RUNFILE
	@ if test -f $rscaleLOG; then mv $rscaleLOG ${rscaleLOG}.old ; fi
	@ echo "$SCALING scaling everything to ${refMTZ}.  (see $rscaleLOG)"
	@ ${SCRIPT_dir}/rough_scale.com ${sortMTZ} ${EXTRA_ARGS} | tee ${rscaleLOG} | egrep "Cycle|shift"

# smooth localscaling
${lscaleMTZ}: ${rscaleMTZ} ${SCRIPT_dir}/localscale.com $RUNFILE
EOF
if($?ROUGHSCALE_ONLY) then
    cat << EOF >> Makefile
	@ echo "localscaling disabled"
	@ ln -sf `basename ${rscaleMTZ}` ${lscaleMTZ}
EOF
else
    cat << EOF >> Makefile
	@ if test -f $lscaleLOG; then mv $lscaleLOG ${lscaleLOG}.old ; fi
	@ echo "localscaling: this can take a long time.  (see $lscaleLOG)"
	@ ${SCRIPT_dir}/localscale.com ${rscaleMTZ} ${EXTRA_ARGS} | tee ${lscaleLOG} | egrep "Cycle|shift"
EOF
endif
cat << EOF >> Makefile
	@ echo ""

###############################################################################

# merge individual wavelengths from final scaled file
EOF
foreach wave ( $wavenames )
cat << EOF >> Makefile
${MTZ_dir}/${wave}.mtz: ${lscaleMTZ} ${SCRIPT_dir}/merge.com
	@ if test -f ${mergeLOG}${wave}.log; then mv ${mergeLOG}${wave}.log ${mergeLOG}${wave}.log.old ; fi
	@ echo "mergeing $wave as ${MTZ_dir}/${wave}.mtz ...  (see ${mergeLOG}${wave}.log)"
	@ ${SCRIPT_dir}/merge.com ${wave} ${lscaleMTZ} > ${mergeLOG}${wave}.log
	@ mv merged.mtz ${MTZ_dir}/${wave}.mtz

EOF
end
# figure out where Free-R flags should come from
if("$freeR_source" == "") set freeR_source = "5%"
if(("$freeR_source" !~ *"%")&&(! -e "$freeR_source")) set freeR_source = "5%"

cat << EOF >> Makefile
###############################################################################
# collate scaled and merged wavelengths together (and make free-R flags)
${finalMTZ}: $MERGEfiles ${SCRIPT_dir}/scaleit.com
	@ if test -f $finalLOG; then mv $finalLOG ${finalLOG}.old ; fi
	@ echo ""
	@ echo "combining $wavenames into ${finalMTZ} ... (see ${finalLOG})"
	@ ${SCRIPT_dir}/scala_summary.com ${mergeLOG}*.log
	@ ${SCRIPT_dir}/scaleit.com > ${finalLOG}
	@ mv scaleited.mtz ${finalMTZ}
	@ rm -f cadded.mtz
	@ echo "adding $freeR_source Free-R flags ..."
	@ ${SCRIPT_dir}/FreeRer.com ${finalMTZ} $freeR_source >> ${finalLOG}
	@ mv FreeRed.mtz ${finalMTZ}
	@ mv freeR_flag.mtz ${MTZ_dir}
	@ mv XPLOR.cv ${XPLOR_dir}/freeR.cv
	@ echo ""
	@ echo "True completeness to $hiRES A:"
	@ ${SCRIPT_dir}/mtz_sum.com $hiRES ${finalMTZ}
	@ echo ""
	@ ${SCRIPT_dir}/scaleit_sum.com ${finalLOG} >> ${finalLOG}
	@ echo "look at end of ${finalLOG} for Dano and Diso vs $wave_reference"

###############################################################################
# creation of Patterson maps

# calculate "best" FH Patterson (and update shelx data)
${MAP_dir}/FH_Patt.map: ${finalMTZ} ${SCRIPT_dir}/bestFH.com
	@ ${SCRIPT_dir}/bestFH.com ${finalMTZ} > ${LOG_dir}/bestFH.log
	@ cat bestFH.log >> ${LOG_dir}/bestFH.log 
	@ rm -f bestFH.log 2> /dev/null
	@ mv fh.hkl ${SHELX_dir}/fh.hkl 2> /dev/null
	@ mv FH.mtz ${MTZ_dir}/bestFH.mtz 2> /dev/null
	@ mv FH_Patt.map ${MAP_dir}/FH_Patt.map 2> /dev/null
#	@ mv wFH_Patt.map ${MAP_dir}/ 2> /dev/null

EOF

# don't run revise for single-wavelength data
if("$REVISE_Patt" != "") then
    cat << EOF >> Makefile
# calculate "revised" FM Patterson (and update shelx data)
${MAP_dir}/FM_Patt.map: ${finalMTZ} ${SCRIPT_dir}/revise.com
	@ ${SCRIPT_dir}/revise.com ${finalMTZ} > ${LOG_dir}/revise.log
	@ mv fm.hkl ${SHELX_dir}/fm.hkl 2> /dev/null
	@ mv FM.mtz ${MTZ_dir}/revisedFM.mtz 2> /dev/null
	@ mv FM_Patt.map ${MAP_dir}/FM_Patt.map 2> /dev/null

EOF
endif
cat << EOF >> Makefile
###############################################################################
# conversion to other programs formats

# SOLVE
EOF

foreach wave ( $wavenames )
cat << EOF >> Makefile
${SOLVE_dir}/${wave}.fmt: ${MTZ_dir}/${wave}.mtz ${SOLVE_dir}/mtz2SOLVE.com
	@ ${SOLVE_dir}/mtz2SOLVE.com ${MTZ_dir}/${wave}.mtz > ${extractLOG}SOLVE_${wave}.log
	@ mv SOLVE.fmt ${SOLVE_dir}/${wave}.fmt

${SOLVE_dir}/${wave}.unmerged.fmt: ${lscaleMTZ}  ${SCRIPT_dir}/extract.com  ${SOLVE_dir}/mtz2SOLVE.com
	@ ${SCRIPT_dir}/extract.com ${wave} ${lscaleMTZ} > ${extractLOG}unmerged_SOLVE_${wave}.log
	@ ${SOLVE_dir}/mtz2SOLVE.com unmerged.mtz >> ${extractLOG}unmerged_SOLVE_${wave}.log
	@ rm -f unmerged.mtz > /dev/null
	@ mv SOLVE.fmt ${SOLVE_dir}/${wave}.unmerged.fmt

EOF
end

echo "###############################################################################" >> Makefile
echo "# SHELX" >> Makefile

cat << EOF >> Makefile

# these will be made when the Pattersons are made
${SHELX_dir}/fh.hkl: ${MAP_dir}/FH_Patt.map
${SHELX_dir}/fm.hkl: ${MAP_dir}/FM_Patt.map

# pick best SHELX data files
${SHELX_dir}/ano.hkl: $SHELXafiles
	@ ${SHELX_dir}/best_signal.com ${SHELX_dir}/*_ano.hkl
	@ mv best.hkl ${SHELX_dir}/ano.hkl

${SHELX_dir}/iso.hkl: $SHELXifiles
EOF

if("$SHELXifiles" != "") then
cat << EOF >> Makefile
	@ ${SHELX_dir}/best_signal.com ${SHELX_dir}/*-*.hkl
	@ mv best.hkl ${SHELX_dir}/iso.hkl
EOF
endif

foreach file ( $SHELXafiles )
set wave = `basename $file _ano.hkl`
cat << EOF >> Makefile
${file}: ${MTZ_dir}/${wave}.mtz ${SHELX_dir}/mtz2SHELX.com
	@ ${SHELX_dir}/mtz2SHELX.com ${MTZ_dir}/${wave}.mtz > ${extractLOG}SHELX-${wave}_ano.log
	@ mv SHELX_ano.hkl $file
	@ echo $file >> ${extractLOG}SHELX-${wave}_ano.log
	
EOF
end

# SHELXifiles can blow up line size with high # of wavelengths, so use nawk here
echo $wavenames $SHELX_dir |\
 nawk -v xlog=$extractLOG -v mtz=$MTZ_dir '{for(i=1;i<NF;++i){for(j=1;j<i;++j){\
        file = $NF "/" $i "-" $j ".hkl ";\
	print file ": " mtz "/" $i ".mtz " mtz "/" $j ".mtz ";\
	printf "\t@ %s/mtz2SHELX.com %s/%s.mtz %s/%s.mtz > ", $NF, mtz, $i, mtz, $j;\
	printf "%sSHELX-%s-%s.log\n", xlog, $i, $j;\
	printf "\t@ mv ./SHELX_iso.hkl %s \n", file;\
	printf "\t@ echo %s >> %sSHELX-%s.log\n\n", file, xlog, $i, $j;\
       }}}' >> Makefile


echo "###############################################################################" >> Makefile
echo "# X-plor" >> Makefile

foreach wave ( $wavenames )
cat << EOF >> Makefile
${XPLOR_dir}/${wave}_anom.fobs: ${MTZ_dir}/${wave}.mtz
	@ ${XPLOR_dir}/mtz2XPLOR.com ${MTZ_dir}/${wave}.mtz > ${extractLOG}XPLOR-${wave}.log
	@ mv XPLOR.fobs ${XPLOR_dir}/${wave}_anom.fobs

EOF
end



#
# Make a README file
set SCRIPT = ./README.Scaler
cat << EOF-README >! $SCRIPT

Scaler Elves guide to their scripts

1) In a rush?  Just type this:

make 

That's it.  

Your scaled and merged data will end up in ${finalMTZ}, in CCP4/SHARP format, 
and will also be converted into SOLVE, SHELX, and X-PLOR formats.



2) Okay, how does it work?

    All the really important stuff is in ${SCRIPT_dir}, and that directory
has its own README file.

In this directory (`pwd`):

################################################################################
Makefile		- "make" file for your project

    usage: make [target]
    where: [target] is the thing (file) you want to create or update
    
    example: make SOLVE

    will create data files for input into Tom Terwilliger's "solve" program

    The unix program "make" is a wonderful tool for maintaining a complex, 
    interdependent system of files (like source code, or x-ray data processing).
    The Makefile contains a list of instructions and rules, like this:
    
somefile.mtz: someotherfile.mtz script.com
	script.com >! logfile.log

    Translated: 
    To create somefile.mtz, you need someotherfile.mtz and script.com.  If
    somefile.mtz is missing, or if someotherfile.mtz or script.com are newer 
    (by file date stamp) than somefile.mtz, then execute the command:
    "script.com >! logfile.log".  

    This way, if you edit script.com or change someotherfile.mtz, "make" knows 
    that somefile.mtz needs to be updated.  The really neat part of it all is 
    that the Makefile can also contain a similar entry for how to make 
    someotherfile.mtz, using a different script, and "make" will then know 
    it needs to update somefile.mtz after it has updated someotherfile.mtz.
    The list can go on and on.
    Simply put, typing "make" will update your project, making use of any
    changes you have made to the scripts, and frees you from keeping track
    of which scripts to run in which order.
    
    To "defeat" make, and keep it from doing processing you don't want it to, 
    use "touch".  If you type: "touch ${rscaleMTZ}", then, as far
    as make is concerned, you just ran ${SCRIPT_dir}/rough_scale.com
    etc...

################################################################################
$RUNFILE		- customizable run list 

    Your data are divided into "runs" of contiguous frames
    Run-specific scala commands can be put in here, i.e.
    RESOLUTION run 1 5.0 
    for a 5A cuttoff on run 1

################################################################################
$RULESFILE		- custom SCALA commands used in all Scaler Elves scripts

    Note: unlike ${RUNFILE}, $RULESFILE is not dynamically loaded by the
    scripts in ${SCRIPT_dir}.  However, you can present this file to Scaler
    Elves the next time you invoke them, and they will include any scala cards
    they find there into their scala scripts.
    
################################################################################



3) What if something goes wrong?

    The Scaler elves have been trained to handle a number of common problems 
encountered in their localscaling procedure.  However, if something happens that
is beyond their experience, it's up to you to figure it out.  :(  But, please
email jamesh@ucxray6.berkeley.edu about your problem.

    The scripts you will need to look at are all in ${SCRIPT_dir},  and are run
(pretty much) in the following order:
${SCRIPT_dir}/sort_everything.com    - sorts all raw data together
${SCRIPT_dir}/make_reference_set.com - creates a small, reference dataset from a selected wavelength
${SCRIPT_dir}/import_reference.com   - imports the reference dataset into the raw, unscaled mtz
${SCRIPT_dir}/rough_scale.com        - does one-scale-per-frame scaling
${SCRIPT_dir}/localscale.com         - performs 3D localscaling
${SCRIPT_dir}/merge.com wave         - merges "wave", do this for each of: $wavenames
${SCRIPT_dir}/merge.com ...etc.
${SCRIPT_dir}/scaleit.com            - combines all wavelengths into one file

the file ${SCRIPT_dir}/README explains all this in more detail

    Wether you have problems or not, When you're ready to start tinkering around, 
(edit the scripts, trying other space groups, etc.), have a look in ${SCRIPT_dir}, 
${SHELX_dir}, ${SOLVE_dir}, and ${XPLOR_dir}.  There are README files there too.


EOF-README


set SCRIPT = "${SCRIPT_dir}/README"
grep "Scaler Elves" $SCRIPT >& /dev/null
if(! $status) set SCRIPT = ${SCRIPT_dir}/README.Scaler
cat << EOF-README >! $SCRIPT

Scaler Elves guide to their scripts



What am I supposed to do with all these scripts!?


################################################################################
sort_everything.com	- script for combining and sorting all raw data into one file
    reads: $RAWfiles	    
    makes: ${rawMTZ}
    usage: ${SCRIPT_dir}/sort_everything.com [SG]
    where: [SG] is the (optional) new space group

    example: ${SCRIPT_dir}/sort_everything.com $SG >! $sortLOG
	
	Will sort all raw data and reindex it to $SG
    (Note: the reindexing done here is at the mercy of the CCP4 program "reindex", 
    it is always safer to change your indicies in the integration program)
    For asymmetric orthorhombic space groups (P2221, P21212 and C2221) you can
    also specify "pseudo" space groups like "P2212" to indicate P2221 with the 
    screw axis along "b" etc.
    
################################################################################
make_reference_set.com	- scale and merge the reference data set.
    reads: ${rawMTZ}
    makes: ${refMTZ}
    usage: ${SCRIPT_dir}/make_reference_set.com [infile.mtz]

    example: ${SCRIPT_dir}/make_reference_set.com >! $refLOG

    This script creates a "pre-merged" reference data set out of one of 
    your wavelengths.  This reference set will be used by SCALA as a guide 
    for scaling all raw data.  It exists solely to stabilize the scaling run, 
    and is not included in the final mergeing step.  Mergeing of wavelength used
    to make this reference set will follow the same procedure as the rest of 
    the wavelengths.
    
    Note: this is not, necessarily the reference set you will be using in
     mlphare, etc.

################################################################################
import_reference.com  - combine the reference data set with ${rawMTZ}.
    reads: ${rawMTZ} ${refMTZ}
    makes: ${sortMTZ}
    usage: ${SCRIPT_dir}/import_reference.com reference.mtz [infile.mtz]

    example: ${SCRIPT_dir}/import_reference.com ${refMTZ} >> $refLOG

    This script imports an arbitrary reference data set into your scaling
    run.  This can be almost any set of unique data.  By default, Scaler
    Elves will make a reference dataset one from your most complete wavelength
    using the script above.
    You could also use a calculated dataset from your final, refined structure
    as ${refMTZ} here.  This would have the effect of traditional FC-directed 
    absorption corrections in localscaling (below).  By default, the Free-R 
    Free-R flagged HKLs will be excluded, as not to bias scaling of Fobs to your
    final Fc.  This allows you to use the free R to see if this absorption 
    correction did you any good.

################################################################################
rough_scale.com		- first round of all-data scaling
    reads: ${sortMTZ} $RUNFILE
    makes: ${rscaleMTZ}
    usage: ${SCRIPT_dir}/rough_scale.com [cycles] spacing [spacing] filter [infile]

    where: [cycles] is the number of scaling cycles you want (default: 50)
	   [spacing] is the Bfactor smoothing window, in degrees (default: 10)
	   [infile] is the input mtz file (default: ${sortMTZ})
	   filter turns on the "eigenvalue filter"
    
    example: ${SCRIPT_dir}/rough_scale.com 5   spacing 10   filter

    This script scales all data (guided by the "pre-merged" reference) using
    one scale factor per frame, but requiring that the B-factor vary smoothly
    over all frames.  This script mainly serves to remove large discontinuities
    in scale that would crash localscale.com.  If your runs have no discontinuities
    (no fills, inverse beam, etc.),  then you can skip rough_scale.com	   

    If $RUNFILE is missing, it will be regenerated by rough_scale.com
    
################################################################################
localscale.com		- second round of all-data scaling (3D scales)
    reads: ${rscaleMTZ} $RUNFILE
    makes: ${lscaleMTZ}
    usage: ${SCRIPT_dir}/localscale.com [cycles] spacing [spacing] filter [infile]

    where: [cycles] is the number of scaling cycles you want (default: 50)
	   [spacing] is the scale smoothing window, in degrees (default: 10)
	   [infile] is the input mtz file (default: ${rscaleMTZ})
	   filter turns on the "eigenvalue filter"

    example: ${SCRIPT_dir}/localscale.com 50    ${sortMTZ}

    This script scales all data, guided by the "pre-merged" reference, and 
    building on the scales obtained by rough_scale.com in a "3D" localscaling
    procedure.  Scale factors are required to vary smoothly within each "run".
    B-factors are not refined, but instead, the scale is allowd to vary (smoothly) 
    across the detector face.  Combined with the smooth scaling over frames, this
    has the effect of assigning a smoothly-varying scale to every point in the
    observed reciprocal space, and, hence, localscaling.  This is the MAD scaling
    procedure recommended by Phil Evans in the SCALA documentation, and JMH has
    found it to improve Rmerge significantly.
    
    If $RUNFILE is missing, it will be regenerated by localscale.com

################################################################################
merge.com		- mergeing utility
    reads: ${lscaleMTZ} $RUNFILE
    makes: merged.mtz
    usage: ${SCRIPT_dir}/merge.com [wave] [RESO] [SG]

    where: [wave] is is the wavelength name to merge (default: all of them)
           [RESO] is the high-resolution cutoff (default: $hiRES)
	   [SG] is the space group, reindexed using "reindex" (default: $SG)
    
    example: ${SCRIPT_dir}/merge.com $wave_reference $hiRES $otherSGs[$#otherSGs]

    This script merges all data from the provided wavelength.  No scaling is done, 
    so you should use a scaled MTZ.  merge.com "knows" which wavelength is which
    from the information in $RUNFILE
 		    
    If $RUNFILE is missing, it will be regenerated by merge.com

################################################################################
extract.com		- non-mergeing wavelength extractor
    reads: ${lscaleMTZ} $RUNFILE
    makes: unmerged.mtz
    usage: ${SCRIPT_dir}/extract.com wave [RESO] [SG]

    where: wave is is the wavelength name to merge (required)
           [RESO] is the high-resolution cutoff (default: $hiRES)
	   [SG] is the space group, reindexed using "reindex" (default: $SG)
    
    example: ${SCRIPT_dir}/extract.com $wave_reference $hiRES

    This script works pretty much the same as merge.com, except it does
    not merge equivalent reflection data.  HOWEVER, it does add partials.
    extract.com serves mainly to migrate scaled, but unmerged reflection
    data to another scaling program (such as SOLVE's localscaling procedure).

    If $RUNFILE is missing, it will be regenerated by extract.com

################################################################################
scaleit.com		- place merged data in a multicolumn MTZ file
    reads: ${lscaleMTZ} $RUNFILE
    makes: ${finalMTZ}
    usage: ${SCRIPT_dir}/scaleit.com [$wave_reference]
			    
    where:  [$wave_reference] is the wavelength name to use as a reference
	    (one of: $wavelengths)

	This script combines each of the files produced by merge.com into
    a single, multi-column mtz file.  This is the file you should use for
    SHARP and mlphare.
	If you type "${SCRIPT_dir}/scaleit.com all", scaleit.com will do a 
    scaleit run on each of $wavenames in turn.
    
    

Utilities:

################################################################################
scaleit_sum.com		- sumarize Diso and Dano
    reads: scaleit logs
    makes: an xloggraph plot (to screen)
    usage: ${SCRIPT_dir}/scaleit_sum.com ${LOG_dir}scaleit.log

	This little jiffy serves primarily in edge walking.  It gives you
    a quick plot of Dano and Diso (relative to the reference) vs. x-ray energy.

################################################################################
mtz_sum.com		- sumarize an MTZ file
    reads: merged MTZ files
    makes: a nice table of completeness and <F>/<sigF>
    usage: ${SCRIPT_dir}/mtz_sum.com mtzfile.mtz [RESO]
    
    example: ${SCRIPT_dir}/mtz_sum.com $hiRES ./mtz/all.mtz

	Prints out completeness and F/sigF for every F in the mtz file provided.
    
################################################################################
scala_summary.com	- sumarize mergeing results from one or more scala logs
    reads: scala/truncate log files
    makes: a nice table of Rmerge Ranom I/sigma Completeness Multplicity and Wilson B
    usage: ${SCRIPT_dir}/scala_summary.com scala.log [otherscala.log ... ]

    example: ${SCRIPT_dir}/scala_summary.com ./logs/merge_*

	Prints out 

################################################################################
FreeRer.com		- add/inherit Free-R flags
    reads: one or two mtzs (or one mtz and an x-plor file)
    makes: FreeRed.mtz, FreeR_flag.mtz and XPLOR.cv
    usage: ${SCRIPT_dir}/FreeRer.com mtzfile.mtz [free-R source] [fraction[%]]
    where: mtzfile.mtz is the file you want to ADD Free-R flags to
           [free-R source] is the file you want to get the flags from (mtz or X-plor)
	   [fraction[%]] is the fraction of spots to put in the free-R set (default: 10%)

    example: ${SCRIPT_dir}/FreeRer ${finalMTZ} /some/random/place/xplor/olddata.cv
    
    FreeRed.mtz, FreeR_flag.mtz and XPLOR.cv will always be made, and they contain equivalent 
    representations of the Free-R set.  FreeR_flag.mtz, however will contain Free-R assignments 
    extending out to 1.5A.  That way, FreeR_flag.mtz can be used to assign the Free-R set from 
    future crystals (which might diffract better).
    If no [free-R source] is given, the Free-R flags will be made up (as in uniqueify).  However, 
    if a second file is given (mtz or X-PLOR format) The Free-R flags will be taken from it.  
    Any "holes" in an externally-obtained set (I.E. missing HKLs) will be filled in as described
    in the CCP4 documentation.  
    The given example will produce a file called FreeRed.mtz that contains the Free-R flags used 
    in /some/random/place/xplor/olddata.cv.  
    
################################################################################
bestFH.com	- Matthews "best" FH estimator

    input:  all.mtz	- a cad-ed mtz file with multiple data sets
    output: FH.mtz	- an mtz containing only the estimate of FH
            fh.hkl	- shelx version of FH.mtz
	    FH_Patt.map - a Patterson map of FH
	    FH_Four.map - phased map of FH (if a phase is in all.mtz)
	   
    usage: ${SCRIPT_dir}bestFH.com all.mtz [Fset] [Dset] [1.8A]
    where: 
    all.mtz    contains same-site reflection data      (default: mtz/all.mtz)
    Fset       are the sets of Fs you want to use      (default: all of them)
    Dset       are the sets of Danos you want to use   (default: all of them)
    1.8A       is the desired outer resolution limit   (default: all data)
    PHI        is the phase set you want to use	       (default: most recent phase)
    
    FH_Patt.map is calculated with a 4*rms(FH) cutoff, as calculated by scaleit.
    
    example1: ${SCRIPT_dir}bestFH.com mtz/all.mtz
	will calculate an estimate of FH from all the difference data in 
	mtz/all.mtz.
	
    example2: ${SCRIPT_dir}bestFH.com dmed.mtz
	will calculate an estimate of FH from all the difference data in 
	mtz/all.mtz. (same as above), but will also calculate a phased map 
	of FH, using the most recently-added phase in dmed.mtz (PHIDM).  
	This is usually superior to ordinary difference Fouriers for finding 
	new heavy-atom sites.
    
    example3: ${SCRIPT_dir}bestFH.com mtz/all.mtz no DANOFlo Flo
	same thing, but leave the "DANOFlo difference data-set and "Flo" data
	set out of the calculation.
    
    description:
	This script offers the "new" functionality of computing a "Matthews FH" 
	estimate.  This analysis not only "averages" information from all your 
	diference data into a single data set, but reduces the systematic error
	produced by cross-terms in the substraction of anomalous and 
	isomorphous difference data: |FH| == |FPH-FP| != |FPH|-|FP|
    
	In bestFH.com, all anomalous difference data are scaled together, 
	and then added (sigma-weighted) together.  Then, all the possible
	isomorphous differences between "F"s in the mtz are subtracted, 
	scaled, and also added together.  Finally, Dano is scaled against
	Diso, and FH is calculated by the Pythagorean theorem.
	
	Care must be taken in the ordering of the "F" datasets.  For example, 
	in a 3-wavelength MAD experiment: Finf Fpeak Fhi should be the order
	used.  Fhi Fpeak Finf is okay too, but not Finf Fhi Fpeak.  The latter
	would result in Finf-Fhi and Fhi-Fpeak "canceling" each other, because
	the f' differences will have opposite signs.  bestFH.com will try to
	get this ordering right, but you should check the difference dataset
	list to make sure none of them are opposing each other.

	Note also that all the data in mtz/all.mtz should be from 
	crystals with metal sites at the same positions, otherwise, FH will 
	be a mix of the two site constellations.  
    
    
################################################################################
reindex.com	- general-purpose re-indexing script

    input:  data.mtz	    - mtz file to re-index (merged or unmerged)
    output: reindexed.mtz   - mtz file with the new space group
    
    examples: 
	${SCRIPT_dir}reindex.com data.P41212.mtz P43212
	  - will change the space group of "data.P41212.mtz" to P43212 
	    (assuming that is possible), and write the results to 
	    "reindexed.mtz"
	${SCRIPT_dir}reindex.com data.P2221.mtz P2122
	  - will change the space group of "data.P2221.mtz" to the "pseudo" 
	    space group "P2122", which is P2221, but with "a" as the screw 
	    axis.  This is done by leaving the mtz file in P2221, but 
	    permuting the cell (and the data) so that the shortest cell edge 
	    (normally "a"), is moved to the third cell parameter (the one 
	    with screw symmetry).
	
    description:
	This is a general utility for changing the assigned space group of 
	mtz data using the CCP4 program "reindex".  It works on merged and 
	unmerged data.  Re-assignment of the screw/rotation axes of 
	anisotropic orthorhombic space groups is supported (see example 2).
	
	"Flipping" between alternative axis assignments is also easily done.
	Just include the word "flip" on the reindex.com command line to switch
	to the "other" axis assignment.  This may be neccessary for any space
	group having two or more cell edges exactly the same length.  The only 
	tricky ones are R3 and P3x, which have four possible axis assignments.  
	To specify the remaining two, use the word "flip" two or three times 
	(respectively).  see ${CDOC}/reindexing.doc for details.
	
	Changing between space groups with different point group, or even
	lattice symmetry is allowed, but unadvisable!  These transformations
	involve mergeing or "un-mergeing" spots, which reindex can't do.
    
	Note: moving/removing screw axes will result in the "loss" of 
	some systematic absence reflections, so be careful.  It is probably
	advisable to always merge in P222, and reindex later.
    
################################################################################
SGsearch.com		- exhaustive space-group search
    reads: a scala script
    makes: a table of mergeing statistics
    usage: SGsearch.com [script.com] [raw.mtz] [rootSGs]
    where: 
	script.com is the scala script to use        (default: merge.com)
	raw.mtz    is the raw, unscaled data         (default: raw.mtz)
	rootSGs    is/are the "starting" space group (default: SG from raw.mtz)

    example: SGsearch.com merge.com P212121

	will run merge.com with every orthorhombic space group:
	    P222, P2221, (P2122, P2212), P21212, (P21221, P22121), and P212121

    Picking the wrong space group has been known to waste weeks to years of an 
    investigators time.  SGsearch.com uses the space group provided to get the
    general crystal system your crystal was indexed with, and will then try 
    mergeing your data in EVERY space group belonging to that crystal system.
    The Rmerge, systematic absences, and asymmetric unit volume will be presented
    in a neat table for your review.  
    
    The actual logs from the individual merge.com runs will be placed in the ./logs/
    directory, named merge.SG.log.  If SGsearch.com finds these logs aready exist, 
    it will use the statistics in them to make the table, this usually saves you a 
    lot of time re-generating the table, and you can always delete these logs, and 
    run SGsearch.com again.
    
    SGsearch.com is desiged to work with the merge.com provided by Wedger Elves, 
    but should work fine with any scala/truncate script that is capable of 
    accepting and applying a space group provided on its command line.

################################################################################
autoscala		- optimizer for SDCORR card
    reads: a scala script
    makes: a better scala script
    usage: ${SCRIPT_dir}/autoscala script.com
    where: script.com is the \scala script to optimize

    example: ${SCRIPT_dir}/autoscala ${SCRIPT_dir}/merge.com

    Scala's SDCORRECTION card allows the assigned error (sigma) of the spot 
    intensities to be edited.  Most measurement programs cannot predict the
    effects of absorption and other systematic measurement errors, and therefore
    usually give unrealisticially low estimates of the error in the measured
    spot intensities.  You should read the scala documentation to find out 
    exactly how SDCORR works.
    Briefly, "correct" sigmas should be similar to the scatter of observed intensities.
    That is, if the 10 observations of hkl=(5,9,12) deviate from the average value
    of (5,9,12) by 100 units (rms), then the sigma of (5,9,12) should be 100.  So, 
    if the assigned sigma is 50, then the scatter/sigma will be 2.  This analysis, 
    grouped by intensity bins, is the last graph in the scala logfile.  You want 
    all the points on this graph to be as close to 1.0 as possible.  If you see this, 
    then your assigned sigmas are probably realistic.
    To save you from hours of diddling with the SDCORR numbers, autoscala uses a 
    "Golded-Section" search (derived from Numerical Recipies), to optimize the three 
    numbers for scala's SDCORRECTION card, using the deviation of the aforementioned
    graph from 1.0 as a target.  In CCP4 3.3 and beyond, the first number on the SDCORR 
    card is optimized internally (and might as well be "1"), but the remaining two can 
    be tuned up by autoscala.

################################################################################
mtz2various.com		- basic format-converter script
    reads: merged.mtz
    makes: outfile.EXT
	EXT -> FORMAT
	cif -> CIF
	hkl -> shelx
	tnt -> TNT
	fin -> XtalView
	phs -> XtalView
	fobs-> XPLOR
	cv  -> XPLOR
	cns -> CNS
    usage: mtz2various.com merged.mtz outfile.EXT [format]
    where: 
	merged.mtz   is the merged mtz file (containing Fs)
	outfile.EXT  is the filename you want to use for the exported data
	format	     is the (optional) program you want outfile.EXT formatted for

    examples:
	mtz2various.com merged.mtz merged.cif
	  - will convert merged.mtz to CIF format
	mtz2various.com all.mtz "F1" merged.fobs
	  - will convert "F1" in all.mtz to XPLOR format
	mtz2various.com merged.mtz merged.hkl shelx
	  - will convert merged.mtz to shelx format
	mtz2various.com merged.mtz merged.hkl tnt
	  - will convert merged.mtz to TNT format

    description:
	mtz2various.com is a general-purpose "smart" script for converting
	"F" data from an mtz file (such as ${finalMTZ}) to other file formats
	for other non-CCP4 programs.  The format of the output file can either
	be implied by using a standard file extension in the output file name,
	or declared explicitly and separately on the command line.  Free-R 
	flags are exported automatically, if they are present.  In the case of 
	XtalView files, a suitable CRYSTAL file is also generated.

################################################################################
autoscala		- optimizer for SDCORR card
    reads: a scala script
    makes: a better scala script
    usage: ${SCRIPT_dir}/autoscala script.com
    where: script.com is the \scala script to optimize

    example: ${SCRIPT_dir}/autoscala ${SCRIPT_dir}/merge.com

    Scala's SDCORRECTION card allows the assigned error (sigma) of the spot 
    intensities to be edited.  Most measurement programs cannot predict the
    effects of absorption and other systematic measurement errors, and therefore
    usually give unrealisticially low estimates of the error in the measured
    spot intensities.  You should read the scala documentation to find out 
    exactly how SDCORR works.
    Briefly, "correct" sigmas should be similar to the scatter of observed intensities.
    That is, if the 10 observations of hkl=(5,9,12) deviate from the average value
    of (5,9,12) by 100 units (rms), then the sigma of (5,9,12) should be 100.  So, 
    if the assigned sigma is 50, then the scatter/sigma will be 2.  This analysis, 
    grouped by intensity bins, is the last graph in the scala logfile.  You want 
    all the points on this graph to be as close to 1.0 as possible.  If you see this, 
    then your assigned sigmas are probably realistic.
    To save you from hours of diddling with the SDCORR numbers, autoscala uses a 
    "Golded-Section" search (derived from Numerical Recipies), to optimize the three 
    numbers for scala's SDCORRECTION card, using the deviation of the aforementioned
    graph from 1.0 as a target.  In CCP4 3.3 and beyond, the first number on the SDCORR 
    card is optimized internally (and might as well be "1"), but the remaining two can 
    be tuned up by autoscala.

################################################################################
For more detailed information, see your CCP4 documentation in:
${CCP4}/doc

or go to the CCP4 homepage at:
netscape http://www.dl.ac.uk/CCP/CCP4/main.html

EOF-README


goto Return_from_Generate























































exit
Setup_SOLVE:
###############################################################################

  ####    ####   #       #    #  ######
 #       #    #  #       #    #  #
  ####   #    #  #       #    #  #####
      #  #    #  #       #    #  #
 #    #  #    #  #        #  #   #
  ####    ####   ######    ##    ######
     
###############################################################################
#
#   Create a SOLVE input file
#
###############################################################################

# safeties/defaults for undeclared names
if(! $?CELL) set CELL = `echo 100 100 100 90 90 90`
if(! $?SG) set SG = "P1"
if(! $?otherSGs) set otherSGs = "$SG"
if(! $?hiRES) set hiRES = 3
if(! $?loRES) set loRES = 1000
if(! $?wavenames) set wavenames = "FP"
if(! $?wavelengths) set wavelengths = "1.5471"
if(! $?ASU) set ASU = 30000
set NRES = `echo "$ASU" | nawk '{printf "%d", ($1/120)}'`
if(! $?SITES) set SITES = 2

# program variables
if(! $?SOLVE_dir) set SOLVE_dir = "."
set SCRIPT = ${SOLVE_dir}/solve.com
set SOLVE_program = "solve"
set RESOLVE_program = "resolve"
set SOLVE_home = "."
set RESOLVE_home = ""
set SOLVE_sg_file = `echo "$SG" | nawk '{print tolower($0) ".sym"}'`
set SOLVE_sg_dir = "${SOLVE_home}"
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak

# script variables
if(! $?TITLE) set TITLE = "Example SOLVE input file"
if(! $?tempfile) set tempfile = "./tempfile."
if(! $?MAXLINE) set MAXLINE = 500


# need two-letter element code for SOLVE
if(! $?Ee) then
    # use "$METAL" variable from common Elves programs
    if(! $?METAL) set METAL = "Se"
    if("$METAL" == "unknown") set METAL = "Se"
    
    # convert long element name
    if(-e ${tempfile}elements.awk) then
	# need ${tempfile}elements.awk
	set Ee = `echo $METAL | ${tempfile}elements.awk | nawk 'NR==1{print $2}'`
    else
	# direct conversion?
	if($METAL =~ [A-Z][a-z]) then
	    set Ee = "$METAL"
	else
	    # default to selenium
	    set Ee = "Se"
	endif
    endif
endif

# fill-in missing wavelength values (not too critical)
if($#wavelengths < $#wavenames) then
    while($#wavelengths < $#wavenames) 
	set wavelengths = `echo "$wavelengths 1.00000"`
    end
endif


# create a temporary file of (potentially) unsupported symops
cat << EOF-screwies >! ${tempfile}screwies
17 4 4 P2122 PG222 ORTHORHOMBIC
 X,Y,Z 
 -X,Y,-Z 
 1/2+X,-Y,-Z 
 1/2-X,-Y,Z
17 4 4 P2212 PG222 ORTHORHOMBIC
  X,Y,Z 
  X,1/2-Y,-Z 
 -X,1/2+Y,-Z 
 -X,-Y,Z
18 4 4 P21221 PG222 ORTHORHOMBIC
 X,Y,Z 
 -X,Y,-Z 
 1/2+X,-Y,1/2-Z 
 1/2-X,-Y,1/2+Z
18 4 4 P22121 PG222 ORTHORHOMBIC
 X,Y,Z 
  X,-Y,-Z 
 -X,1/2+Y,1/2-Z 
 -X,1/2-Y,1/2+Z
EOF-screwies


#######################################

# check environment for SOLVE's location
if($?SOLVEDIR) then
    # solve must already be set up!
    set SOLVE_program = ${SOLVEDIR}/solve
    set RESOLVE_program = ${SOLVEDIR}/resolve
    set SOLVE_home = ${SOLVEDIR}
    set SOLVE_sg_dir = "${SOLVE_home}"
endif

onintr Skip_Solve_Search

# look/check for solve access file in a few likely places
if((! -e /usr/local/lib/solve.access)&&(! -e /usr/local/lib/solve/solve.access)&&(! -e /usr/local/lib/solve2.access)&&(! -e /usr/local/lib/solve/solve2.access)) then
    # default locations of access file are not available
    # need to set SOLVEDIR variable 

    test -r ${SOLVE_home}/solve*.access >& /dev/null
    if($status) then
	# solve won't run without this file, look for it
	set files = `which solve |& nawk 'BEGIN{RS=" "} ! system("test -x " $0) {print}'`
	foreach word ( $files )
	    if((-e "$word")&&(! (-e "${SOLVE_home}/solve.access")||(-e "${SOLVE_home}/solve2.access"))) then
		set temp = `ls -lnLd $word/solve*.access |& nawk '! /^d/ && $5 > 10 {print $NF}'`
		if("$#temp" != 1) then
		    set word = `dirname $word`
		    set temp = `ls -lnLd $word/solve*.access |& nawk '! /^d/ && $5 > 10 {print $NF}'`
		endif
		if("$#temp" == 1) then
		    set SOLVE_home = "$word"
		    set SOLVE_sg_dir = "$SOLVE_home"
		endif
	    endif
	end
    endif
    test -r ${SOLVE_home}/solve.access
    if($status) then    
	# get serious, if we still havn't found it
#       foreach place ( /programs/solve /programs /usr/local /home /usr /* / )
	foreach place ( /usr/local /programs/solve /programs /home /usr/bin /usr/share )
	    if((-e "$place")&&(! (-e "${SOLVE_home}/solve.access")||(-e "${SOLVE_home}/solve2.access"))) then
		# FIND files of right name
		set place = `cd $place ; pwd`
		set temp = `find $place -name 'solve*.access' -print |& nawk 'NF==1' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
		if ("$temp" == "") continue

		# get the most recent one
		set temp = `ls -1rt ${temp} | tail -1`
		
		if(-e "$temp") then
		    set SOLVE_home = `dirname $temp`
		    set SOLVE_sg_dir = "$SOLVE_home"
		    break
		endif
	    endif
	end	
    endif
    setenv SOLVEDIR $SOLVE_home
endif


# look/check for the SOLVE executable
test -x "$SOLVE_program"
if($status) then
    # try this
    set SOLVE_program = "$SOLVE_home/"`basename "$SOLVE_program"`
endif

test -x "$SOLVE_program"
if($status) then
    # solve 1.18-style install
    set SOLVE_program = `dirname $SOLVE_home/`/bin/`basename "$SOLVE_program"`
endif

test -x "$SOLVE_program"
if($status) then
    set words = `which solve |& nawk 'BEGIN{RS=" "} ! system("test -x " $0) {print}' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
    foreach word ( $words $SOLVE_home )
	test -x "$SOLVE_program"
	if($status) then
	    # see if it's the executable
	    set temp = `ls -lnLd $word |& nawk '! /^d/ && $5 > 1000000 {print $NF}'`
	    if("$#temp" == 1) then
		set SOLVE_program = "$word"
		set temp = `dirname $SOLVE_program`
		if(-e "${temp}/${SOLVE_sg_file}") then
		    set SOLVE_sg_dir = "$temp"
		endif
		continue
	    endif
	    if("$#temp" != 1) then
		# maybe this was a directory in the path? 
		set word = `dirname $word`
		set temp = `ls -ln $word/solve |& nawk '! /^d/ && $5 > 10 {print $NF}'`
	    endif
	endif
    end
endif


# get serious, if we still havn't found it
test -x "$SOLVE_program"
if($status) then
#    foreach place ( /programs/solve /programs /usr/local /usr/* /* / )
    foreach place ( /programs/solve /programs /usr/local /xtal/ /usr/bin /usr/share )
	test -x "$SOLVE_program"
	if(($status)&&(-e "$place")) then
	    # FIND files of right name
	    set temp = `find $place -name solve -print |& nawk 'NF==1' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
	    # get the most recent one
	    set temp = `ls -lnLrtd ${temp} | nawk '/^-/{print $NF}' | tail -1`

	    test -x "$temp"
	    if(! $status) then
		set SOLVE_program = "$temp"
		set temp = `dirname "$temp"`
		if(-e "${temp}/${SOLVE_sg_file}") then
		    set SOLVE_sg_dir = "$temp"
		endif
	    endif
	endif
    end	
endif

# look/check for the RESOLVE executable
test -x "$RESOLVE_program"
if($status) then
    # try this
    set RESOLVE_program = "$SOLVE_home/"`basename "$RESOLVE_program"`
endif

test -x "$RESOLVE_program"
if($status) then
    # hope it's in the path
    set RESOLVE_program = resolve
endif

goto Make_Solve_SGs

Skip_Solve_Search:
onintr
echo "Skipping SOLVE executable search."

Make_Solve_SGs:
onintr

# check for each solve symop file
if(! -e "${SOLVE_sg_dir}/${SOLVE_sg_file}") then
    set SOLVE_sg_dir = "${SOLVE_home}"
endif

# check if basal solve symmetry file exists
grep "X, Y, Z" ${SOLVE_sg_dir}/${SOLVE_sg_file} >& /dev/null
if($status) then
    # just set it to the one in the subdirectory
    #set SOLVE_sg_dir = ${SOLVE_dir}/${SG}/${SOLVE_sg_file}
    set SOLVE_sg_dir = ./${SG}
endif

# actually try to run SOLVE and warn if it didn't work
echo "END" |& "$SOLVE_program" |& grep "Terwilliger" >& /dev/null
if($status) then
    echo "WARNING: we can't run "\"solve\"" on this system."
    echo "find out where "\"solve\"" is, and edit ${SCRIPT}"
    set SOLVE_home = "/programs/solve"
endif
rm -f solve.status >& /dev/null
if (! -e "$RESOLVE_home") set RESOLVE_home = "$SOLVE_home"

# use CCP4 structure-factor program to get f' and f''
crossec << EOF >! ${tempfile}crossec.log
ATOM $Ee
NWAVE $#wavelengths $wavelengths
END
EOF

cat ${tempfile}crossec.log |\
nawk -v Ee=$Ee '$1 == toupper(Ee) {print $2, $3, $4}' |\
sort -nr >! ${tempfile}wl_fpfpp
# format: lambda f' f" 
rm -f ${tempfile}crossec.log >& /dev/null

# make sure f" maximum is really the maximum possible value of f"
if(-e ${tempfile}elements.awk) then
    # avoid making peak f" wavelength the minimum f" becasue
    # of slight undercutting of the metal's theoretical edge
    echo $METAL | ${tempfile}elements.awk |\
    nawk '$NF=="edge"{print "EDGE", 12398.4245/$4, $6}' |\
    cat - ${tempfile}wl_fpfpp |\
    nawk '/EDGE/{edge=$2;fpp=$3}\
        ! /EDGE/{print $0, sqrt(($1-edge)^2), fpp}' |\
    sort -n +3 |\
    nawk '# only increase fpp if close to edge \
	  NR==1 && $4<0.001 && $3<$NF{$3=$NF}\
          {print $1, $2, $3}' |\
    sort -nr >! ${tempfile}
    set temp = `cat ${tempfile} | wc -l`
    if($temp == $#wavelengths) then
	mv ${tempfile} ${tempfile}wl_fpfpp >& /dev/null
    endif
    rm -f ${tempfile} >& /dev/null
endif


# generate wavelength entries in a temporary file
set i = 0
echo "" >! ${tempfile}solvewaves
foreach wave ( $wavenames )
    @ i = ( $i + 1 )
    
    # make sure it is valid
    set wl_fpfpp = `head -$i ${tempfile}wl_fpfpp | tail -1`
    if($#wl_fpfpp != 3) then
	# bad metal, or wavelength
	set wl_fpfpp = `echo "$wavelengths[$i] -?.???   ?.???"`
    endif
    rm -f ${tempfile}
    
    # get energy for printing's sake
    set energy = `echo "$wavelengths[$i]" | nawk '$1+0>0{printf "%.1f", 12398.4245/$1}'`

    cat << EOF-wave >> ${tempfile}solvewaves
lambda $i			! info on wavelength #$i follows
label $wave			! a label for this wavelength
rawmadfile ${wave}.unmerged.fmt	! datafile with h k l I+ sigma+ I- sigma-
wavelength $wl_fpfpp[1]		! wavelength value ($energy eV)
fprimv_mad $wl_fpfpp[2]		! f' value of $Ee at $energy eV   (IMPORTANT!)
fprprv_mad $wl_fpfpp[3]		! f" value of $Ee at $energy eV   (IMPORTANT!)

EOF-wave
end
rm -f ${tempfile}wl_fpfpp
unset wl_fpfpp

# generate space-group definition list
#set SOLVE_sg_file = `echo "$SOLVE_sg_dir $SG" | nawk '{print "symfile", $1 "/" tolower($2) ".sym"}'`

# almost always a bad idea to go to full resolution
# select half the reciprocal-space volume
set solveRES = `echo $hiRES 0.5 | nawk 'NF==2{printf "%.2f", ($2*($1^-3))^(-1/3)}'`

###############################################################################
# now create a SOLVE input file
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv $SCRIPT ${SCRIPT}.bak
cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
#  Prototype SOLVE script from $TITLE
#
############################
#
#  You can run SOLVE interactively by typing:
#   setenv SOLVEDIR $SOLVE_home
#   ${SOLVE_program}
#
#   ... and then feeding it lines starting with "title" below
#
#  The f' and f'' values here were taken from the CCP4 crossec program
#  The SOLVE homepage/manual is at:
#  netscape http://www.solve.lanl.gov/
#
############################
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk

setenv SOLVEDIR $SOLVE_home

echo "Output will be put in solve.log and mad.log"
echo "watch solve.status for the sites."
echo ""
echo "enter Cntrl-Z and bg to background this SOLVE job."
echo ""
echo ""

$SOLVE_program << EOF-solve >! solve.log
title $TITLE

CELL $CELL
symfile ${SOLVE_sg_dir}/$SOLVE_sg_file

resolution $hiRES $loRES
res_phase $solveRES     ! sometimes works better without noisy hi-res data

logfile mad.log                 ! write out most information to this file.
				! summary info will be written to "solve.prt"

mad_atomname $Ee               ! anomalously scattering atom

!fixscattfactors                 ! do not refine scattering factors
refscattfactors                 ! refine scattering factors
thorough

EOF-script
cat ${tempfile}solvewaves >> $SCRIPT
set SAD = ""
set notmad = ""
if ($#wavelengths == 1) then
    set SAD = SAD
    set notmad = "! "
endif
cat << EOF-script >> $SCRIPT

!premerged
unmerged
readformatted
nres $NRES			! approx # of residues in ASU         (IMPORTANT!)
nanomalous $SITES			! approx # of anomalously scattering atoms in ASU (IMPORTANT!)

$SAD
${notmad}SCALE_MAD                       ! read in and localscale the data
${notmad}ANALYZE_MAD                     ! run MADMRG and MADBST and analyze all the Pattersons
${notmad}SOLVE                           ! Solve the structure

EOF-solve
############################
if(\$status) goto bad

grep "TOP SOLUTION FOUND BY SOLVE" solve.status >& /dev/null
if(\$status) goto bad

touch solve.log
echo "A message from the Elves: "                                         >> solve.log
nawk '/STATUS:   DONE  /,/EOF/' solve.status                              >> solve.log
echo "You might get better results with more accurate f' and f'' in \$0."  >> solve.log
echo "the refined values used this time were:"                            >> solve.log
nawk '/NEWATOMTYPE/{print \$NF} \
      /FPRIMV/{print "fprimv_mad", \$NF} \
      /FPRPRV/{print "fprprv_mad ", \$NF}' solve*.script                  >> solve.log
cat << EOF                                                                >> solve.log
a phased MTZ file using this solution should be available in solve.mtz
but, solvent-flattening is highly recommended before you look at maps.
Elves also suggest you put the above sites into SHARP, since it is a
very good phasing program.
EOF

# display message, for those who are listening
tail -500 solve.log | nawk '/A message from the Elves/,/\n/'

exit

bad:
echo "Dang!  SOLVE failed. "
echo "You might need to edit \$0 "
echo "to see what went wrong type:  more solve.log"
exit 9

EOF-script
chmod a+x $SCRIPT


cat << EOF-resolve >! ${SOLVE_dir}/resolve.com
#! /bin/csh -f
#
#	Scaler Elves: Automatic RESOLVE Solvent flattening script
#
#
####################################################################
set mtzfile  = "solve.mtz"
set pdbfile  = "mr_solution.pdb"
set outfile  = "resolved.mtz"
set tempfile = "./resolve_temp"

defaults:
set steps   = "auto"
set Solvent = "50%"

# these are all reset from the command line
set F       = "FP"
set SIGF    = "SIGFP"
set PHI     = PHIB
set FOM     = FOM
set HL      = "HLA=HLA HLB=HLB HLC=HLC HLD=HLD"

setenv SYMOP \${CLIBD}/symop.lib
setenv CCP4_OPEN UNKNOWN

# different systems have different nawks
alias nawk $nawk
nawk 'BEGIN{print}' >& /dev/null
if(\$status) alias nawk awk

if("\$1" == "") goto Help
goto Setup
# scan the command line
Help:
cat << EOF

usage: \$0 [mlphare.mtz] [50%] [Flo]

where:
    mlphare.mtz - output MTZ from MLPHARE
    50%		- solvent content (must end with %)
    Flo		- F you want to flatten with (default to best F in mtz)

EOF
exit 9
ReturnFromSetup:
if(("\$F" == "")||(! -e \$mtzfile)) goto Help

# make solvent content fractional
set Solvent = \`echo \$Solvent | nawk '{print \$1/100}' \`

####################################################################
setenv SOLVEDIR $SOLVE_home
$RESOLVE_program << eof-resolve
HKLIN \$mtzfile \\
HKLOUT \$outfile \\
!TITLE phase resolution in resolve by \$Solvent solvent
!
! number of flattening cycles to execute
!MASK_CYCLE \$steps
! change last two values for buffer between solvent and protein
!SOLC 0.628 MASK 0.60 0.37
SOLVENT_CONTENT \$Solvent
!
! use a pdb file as the starting phases
!NOPRIOR
!MODEL \$pdbfile
\$model
LABIN FP=\$F PHIB=\$PHI FOM=\$FOM \$HL
!LABOU FP=FP PHIM=PHIM FOMM=FOMM
!
eof-resolve
if(! \$status) then
    echo "\$outfile is ready."
else
    exit 2
endif
exit
####################################################################


Setup:
####################################################################
# scan the command line
foreach arg ( \$* )
    if( "\$arg" =~ *.mtz ) set mtzfile  = "\$arg"
    if( "\$arg" =~ *.pdb ) set pdbfile  = "\$arg"
    if( "\$arg" =~ *.brk ) set pdbfile  = "\$arg"
    if( "\$arg" =~ *% ) set Solvent     = "\$arg"
    if(( "\$arg" =~ *[0-9] )&&( "\$arg" =~ [1-9]* )) set steps = "\$arg"
end

#let DM choose scheme for known # of steps
#if("\$steps" != "auto") set SCHEME = AUTO

#get variables from mtz file
echo "go" | mtzdump hklin \$mtzfile |\\
nawk '/OVERALL FILE STATISTICS/,/No. of reflections used/' |\\
nawk 'NF>10 && \$(NF-1) ~ /[FQPWADI]/' |\\
cat >! \${tempfile}mtzdmp

# use completeness, or F/sigF to pick default F
cat \${tempfile}mtzdmp |\\
nawk '\$(NF-1) == "F"{F=\$NF; meanF=\$8; reso=\$(NF-2); comp=substr(\$0,32)+0; \\
      getline; if(\$(NF-1)!="Q") next; S=\$NF; if(\$8) meanF /= \$8; \\
	print F, S, reso, comp, meanF;}' |\\
sort +2n -3 +3nr -4 +4nr >! \${tempfile}F

# and extract all dataset types/labels
cat \${tempfile}mtzdmp |\\
nawk 'NF>2{print \$(NF-1), \$NF, " "}' |\\
cat >! \${tempfile}cards

#clean up
rm -f \${tempfile}mtzdmp

# pick F with best resolution, or F/sigma
set F    = \`head -1 \${tempfile}F\`
if(\$#F > 2) then
    set SIGF = \$F[2]
    set F    = \$F[1]
endif

# pick most recent phase/FOM
grep "P \$PHI " \${tempfile}cards >& /dev/null
if(\$status) then
    set temp = \`nawk '/^P/{print \$2}' \${tempfile}cards  | tail -1\`
    if("\$temp" != "") set PHI = "\$temp"
endif
grep "W \$FOM " \${tempfile}cards >& /dev/null
if(\$status) then
    set temp = \`nawk '/^W/{print \$2}' \${tempfile}cards | tail -1\`
    if("\$temp" != "") set FOM = "\$temp"
endif

# pick most recent HL coefficients
cat \${tempfile}cards |\\
nawk '\$1=="A"{++n;HL[n]=\$NF} END{for(i=1;i<=n;i+=4) \\
      print "HLA="HL[i],"HLB="HL[i+1],"HLC="HL[i+2],"HLD="HL[i+3]}' |\\
cat >! \${tempfile}HL
set HL = \`tail -1 \${tempfile}HL\`

# see if user specified an F, Phase, FOM, or HL set
foreach arg ( \$* )
    set temp = \`grep " \$arg " \${tempfile}cards\`
    if("\$temp" =~ F*) then
	set F = "\${arg}"
	set temp = \`nawk -v arg="\$arg" '\$1==arg{print \$2}' \${tempfile}F\`
	if(\$#temp == 1) set SIGF = "\$temp"
	continue
    endif
    if("\$temp" =~ P*) set PHI = "\${arg}"
    if("\$temp" =~ W*) set FOM = "\${arg}"
    if("\$temp" =~ A*) set HL = \`grep "=\${arg} " \${tempfile}HL | tail -1\`
end

if(-e "\$pdbfile") then
    set model = "pdb_in \$pdbfile"
else
    set model = "!"
endif

rm -f \${tempfile}cards \${tempfile}F \${tempfile}HL >& /dev/null

goto ReturnFromSetup
EOF-resolve
chmod a+x ${SOLVE_dir}/resolve.com

# now create a SOLVE input file for each possible space group
set ORIGINAL = "$SCRIPT"
foreach sg ( `echo $otherSGs | nawk '{print toupper($0)}'` )
    
    # make sure directory exists
    if(-e "${SOLVE_dir}/${sg}") then
	test -d "${SOLVE_dir}/${sg}"
	if($status) then
	    # move the offending file
	    mv ${SOLVE_dir}/${sg} ${SOLVE_dir}/${sg}.bak
	endif
    else
	mkdir ${SOLVE_dir}/${sg}
    endif
    
    # create a different script for each space group
    set SCRIPT = ${SOLVE_dir}/${sg}/solve.com
    if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak
    
    # edit the existing SOVLE script
    cat "$ORIGINAL" |\
    nawk -v SG=$sg '! /^symfile/ && ! /^rawmadfile/ {print} \
	/^rawmadfile/ {print "rawmadfile ../" substr($0, 12)}  \
	/^symfile/    {print "symfile",  "./" tolower(SG) ".sym"}' |\
    cat >! $SCRIPT
    
    chmod a+x $SCRIPT

    # give em a copy of the resolve script
    cp ${SOLVE_dir}/resolve.com ${SOLVE_dir}/${sg}


    # copy each space group file
    set symfile = `echo $sg | nawk '{print tolower($0) ".sym"}'`
    
    # check if solve directory already has one
    grep "X, Y, Z" ${SOLVE_sg_dir}/$symfile >& /dev/null
    if(! $status) then
	# use solve's sym file
	cp ${SOLVE_sg_dir}/$symfile ${SOLVE_dir}/${sg}/$symfile >& /dev/null
    else
	# try to create symop files from CCP4
	set temp = ""
	if($?CLIBD) then
	    if(-e "$CLIBD/symop.lib") then
		set temp = "$CLIBD/symop.lib"
	    endif
	endif
	
	# convert sym file from CCP4
	cat $CLIBD/symop.lib ${tempfile}screwies |\
	nawk -v SG=$sg 'ops{print $1;--ops} \
	$4 == SG && $1 < 999 {printf "%3d Equiv positions, %-10s SG # %3d\n", $2, $4, $1; ops=$2}' |\
	nawk 'BEGIN{FS=","} NR>1{print $1 ", " $2 ", " $3 " "} NR==1' |\
	cat >! ${SOLVE_dir}/${sg}/$symfile
    endif
    
    # warning in case the unthinkable happens
    grep "X, Y, Z" ${SOLVE_dir}/${sg}/$symfile >& /dev/null
    if($status) then
	echo "WARNING: unable to determine symmetry operators for $sg ! "
    endif
end
unset ORIGINAL
unset energy

rm -f ${tempfile}solvewaves
rm -f ${tempfile}symfiles
rm -f ${tempfile}screwies













##############################################################
# create SOLVE hkl converter script
##############################################################
set SCRIPT = ${SOLVE_dir}/mtz2SOLVE.com
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak

cat << EOF-script >! $SCRIPT
#! /bin/csh -fe
#
#  Conversion script to output I+ and I- in SOLVE format
#
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk
#
set infile   = ""
set outfile  = "SOLVE.fmt"
set tempfile = "${tempfile}mtz2solve"
#
goto Setup
#
# usage: $SCRIPT blah.mtz
#
#   if blah.mtz is a merged mtz, then H, K, L, I(+), SIGI(+), I(-), SIGI(-) 
#	will be written to \$outfile
#   if blah.mtz is an unmerged mtz, then  H, K, L, I, SIGI
#	will be written to \$outfile
#
ReturnFromSetup:

##############################################################
if("\$type" == "un") then
# this is the mtz2various format to use, you need 
mtz2various hklin \$infile hklout \$outfile << EOF-solve
OUTPUT USER '(\$format)'
LABIN \$LABIN
END
EOF-solve
else

# mtz2various doesn't work for this, use mtzutils/mtzdump instead
mtzutils hklin1 \$infile hklout \${tempfile}dumpme.mtz << EOF-label
INCLUDE \$LABELS
EOF-label

mtzdump hklin \${tempfile}dumpme.mtz << EOF-dump |\\
  nawk '/LIST OF REFLECTIONS/,/MTZDUMP/' | nawk 'NF>3 && ! /[A-Z]/' >! \$outfile
FORMAT '(\$format)'
NREF -1
EOF-dump

endif

############################
if(! \$status) then
    echo ""
    echo "\$outfile now contains \$I from \$infile in SOLVE format."
    echo "use readformatted and \${type}merged in SOLVE"
    echo ""
endif

exit

Setup:
foreach arg ( \$* )
    if((! -e "\$infile")&&(-e "\$arg")) then
	if(\$arg =~ *.mtz) set infile = \$arg
    endif
end

if(! -e "\$infile") then
    echo "usage: \$0 blah.mtz "
    echo "where: blah.mtz is the mtz file you want to convert to SOLVE format."
    exit 9
endif

# see if this is a merged, or unmerged file
echo "HEADER" | mtzdump hklin \$infile |\
nawk '/^ H K L /{while(NF>1){printf "%s", \$0; getline}; print ""} \
/^ H H H /{while(NF>1){printf "%s", \$0; getline}; print ""}' |\
nawk '/^ H K L /{for(i=1;i<=NF;++i){++n;label[n]=\$i}} \
      /^ H H H /{for(i=1;i<=NF;++i){if(\$i ~ /[JGK]/) print label[i], label[i+1]}}' |\
cat >! \${tempfile}labels

set I = \`cat \${tempfile}labels\`
if(\$#I > 3) then
    # this was a merged file
    set type = "pre"
    set I = "I(+) SIGI(+) I(-) SIGI(-)"
    set LABELS = "\$I"
    set format = "3I5, 4F12.3"
    # Resolve version incompatibility here
    set temp = \`echo "" | mtz2various | & nawk '/VERSION/{print \$7*10}'\`
    if("\$temp" < 34) then
	set LABIN = "DUM1=I(+) DUM2=SIGI(+) DUM3=I(-) DUM4=SIGI(-)"
    else
	# make sure we get these labels right
	set LABIN = "I(+)=I(+) SIGI(+)=SIGI(+) I(-)=I(-) SIGI(-)=SIGI(-)"
    endif
else
    # unmerged file
    set type = "un"
    set format = "3I5, 2F12.3"
    set LABIN = "FP=\$I[1] SIGFP=\$I[2]"
    set LABELS = "\$I[1] \$I[2]"
endif
rm -f \${tempfile}labels

goto ReturnFromSetup

EOF-script
chmod a+x $SCRIPT



#
# Make a README file
set SCRIPT = "${SOLVE_dir}/README"
cat << EOF-README >! $SCRIPT

So, how do I run SOLVE?

1)  Make sure you have *.unmerged.fmt in this directory

    If you don't, type this:
    cd `pwd`
    make SOLVE

2)  Then type this:
    cd `pwd`/${SOLVE_dir}
    solve.com

That's it.

------------------
For your convenience, $otherSGs have been set up to
run SOLVE in other possible space groups for your crystal.
To try one of these space groups, say ${SG}, do this:

cd ${SG}
./solve.com

To run solve in all these space groups, do this:

cd `pwd`/${SOLVE_dir}
foreach SG ( $otherSGs )
cd $SG
./solve.com
cd ..
end

What if it doesn't work?
1)  Try reducing your outer resolution limit
2)  Try getting better values for f' and f'' from your EXAFS
3)  Try commenting out the "refscattfactors" (could be unstable)
4)  Try changeing the "nanomalous" (expected number of sites)
5)  Try eliminating radiation damaged-spots (re-run $0 with less frames)

For more detailed information, go to the SOLVE homepage at:
netscape http://www.solve.lanl.gov/

EOF-README


set SOLVE_SET_UP

goto Return_from_Setup_SOLVE


























exit
Setup_SHELX:
###############################################################################

  ####   #    #  ######  #       #    #
 #       #    #  #       #        #  #
  ####   ######  #####   #         ##
      #  #    #  #       #         ##
 #    #  #    #  #       #        #  #
  ####   #    #  ######  ######  #    #
     
###############################################################################
#
#   Create SHELX input files
#
###############################################################################
#
# see if shelx is in the path
set possibilities = `which shelxs |& nawk 'NF==1 && ! /[\052]/' |& nawk -v L=$MAXLINE '{l+=length($0)} l<L{print}'`
foreach file ( $possibilities )
    test -x "$file"
    if(! $status) then
        # test for program signature (standard error message)
        set temp = `$file $$ |& nawk '/CANNOT OPEN FILE/{print $NF}' | grep "$$.ins" | tail -1`
        if("$temp" != "") then
            # get most recent version (if possible)
            set SHELXS = "$file"
        endif
    endif
end

# look for shelx
foreach place ( /programs /usr/local /usr/local/bin /* /usr/* /usr/local/* )
    if(! $?SHELXS) set SHELXS = `ls -1rt ${place}/shel*/*/shelxs |& head -1`
    if(! -e "$SHELXS") unset SHELXS
    if(! $?SHELXS) set SHELXS = `ls -1rt ${place}/shelxs |& head -1`
    if(! -e "$SHELXS") unset SHELXS
end
if(! $?SHELXS) then
    echo "WARNING: could not find shelxs"
    set SHELXS = "/somewhere/bin/shelxs"
else
    set temp = `pwd`
    cd $SHELX_dir
    if(! -e shelxs) ln -sf $SHELXS shelxs
    if(! -e shelxl) ln -sf `dirname $SHELXS`/shelxl shelxl
    cd $temp
endif

# set up defaults
if(! $?TITLE) set TITLE = "Example SHELX input file"
if(! $?CELL) set CELL = `echo 100 100 100 90 90 90`
if(! $?SG) set SG = "P1"
if(! $?otherSGs) set otherSGs = "$SG"

if(! $?ASU) set ASU = 30000
if(! $?SITES) set SITES = 2

if(! $?METAL) set METAL = Se
if(! $?Ee) set Ee
if("$Ee" == "") set Ee = `echo $METAL | ${tempfile}elements.awk | nawk 'NR==1{print $2}'`
if("$Ee" == "") set Ee = Se

if(! $?ASU_per_CELL) then
    set ASU_per_CELL = `nawk -v SG=$SG '$4 == toupper(SG) {print $2}' $CLIBD/symop.lib |& head -1`
endif

# SHELX wants these per-cell
set unit = `echo "$ASU $ASU_per_CELL" | nawk '{printf "%d", sqrt(($1*$2)/7)}'`
set sites = `echo "$SITES $ASU_per_CELL" | nawk '{printf "%d", $1*$2}'`
set wave = 1.000
if($?wavelengths) then
    set temp = `echo $wavelengths[$#wavelengths] | nawk '$1+0>0.3{print $1+0}'`
    if("$temp" != "") set wave = "$temp"
endif

# this is the only one that works, in my experience
set LATT = "-1"
if("$SG" =~ [Ii]*) set LATT = -2
if("$SG" =~ [Rr]*) set LATT = -3
if("$SG" =~ [Ff]*) set LATT = -4

# create a temporary file of (potentially) unsupported symops
cat << EOF-screwies >! ${tempfile}screwies
17 4 4 P2122 PG222 ORTHORHOMBIC
 X,Y,Z 
 -X,Y,-Z 
 1/2+X,-Y,-Z 
 1/2-X,-Y,Z
17 4 4 P2212 PG222 ORTHORHOMBIC
  X,Y,Z 
  X,1/2-Y,-Z 
 -X,1/2+Y,-Z 
 -X,-Y,Z
18 4 4 P21221 PG222 ORTHORHOMBIC
 X,Y,Z 
 -X,Y,-Z 
 1/2+X,-Y,1/2-Z 
 1/2-X,-Y,1/2+Z
18 4 4 P22121 PG222 ORTHORHOMBIC
 X,Y,Z 
  X,-Y,-Z 
 -X,1/2+Y,1/2-Z 
 -X,1/2-Y,1/2+Z
20 8 4 C2212 PG222 ORTHORHOMBIC
 X,Y,Z 
 -X,1/2+Y,-Z
 X,1/2-Y,-Z
 -X,-Y,Z
 1/2+X,Y,1/2+Z
 1/2-X,1/2+Y,1/2-Z
 1/2+X,1/2-Y,1/2-Z
 1/2-X,-Y,1/2+Z
20 8 4 C2122 PG222 ORTHORHOMBIC
 X,Y,Z 
 1/2+X,-Y,-Z
 1/2-X,-Y,Z
 -X,Y,-Z
 X,1/2+Y,1/2+Z
 1/2+X,1/2-Y,1/2-Z
 1/2-X,1/2-Y,1/2+Z
 -X,1/2+Y,1/2-Z
EOF-screwies


set SCRIPT = "${SHELX_dir}/shelxs.ins"
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak
cat << EOF-shelin >! $SCRIPT
 !
 !  Example SHELX input file
 !
TITLE $TITLE
 ! CELL wavelength A B C a b g
CELL $wave $CELL
 ! ZERR sites/cell  SDs of cell
ZERR $sites   0.001   0.001   0.001   0.00   0.00   0.00

 ! LATT is supposed to be -1 for P, -2 for I, -3 for R, -4 for F
 ! $SG
LATT $LATT
 ! symmetry taken from $CLIBD/symop.lib
EOF-shelin

# try to use MTZ2VARIOUS to get symops
echo "1 1 1 1 1" >! ${tempfile}.hkl
f2mtz hklin ${tempfile}.hkl hklout ${tempfile}.mtz << EOF >& /dev/null
CELL 100 100 100 90 90 90
SYMM $SG
LABOUT H K L F SIGF
CTYPO  H H H F Q
EOF
rm -f ${tempfile}.hkl >& /dev/null
mtz2various hklin ${tempfile}.mtz hklout ${tempfile}.hkl << EOF >& /dev/null
OUTPUT SHELX
LABIN FP=F SIGFP=SIGF
END
EOF
rm -f ${tempfile}.mtz >& /dev/null

grep SYMM ${tempfile}.hkl >&! ${tempfile}.symm
if($status) then
    # try to nab symops (sans identity op)
    cat $CLIBD/symop.lib ${tempfile}screwies |\
    nawk -v SG=$SG 'ops{print "SYMM", $0; --ops}\
     $4 == SG  && $1 < 999 { ops=$3-1; getline}' |\
    cat >! ${tempfile}.symm
endif
cat ${tempfile}.symm >> $SCRIPT
rm -f ${tempfile}.symm
rm -f ${tempfile}.hkl

cat << EOF-shelin >> $SCRIPT

 ! SFAC Doesn't seem all that important (atom types)
SFAC N $Ee
 ! sqrt(#atoms) and #sites in CELL
UNIT $unit $sites

 ! F/sig>1 and 2theta<180 cutoff
 ! OMIT 1 180

 ! tell SHELX to solve by direct methods
TREF
 ! tell SHELX to solve by Patterson methods
 !PATT
 ! tell SHELX to expand a partial solution
 !TEXP
 !${Ee}1  2  0.000 0.000 0.000 1 0.05

 ! tell SHELXD to solve by dual-space methods
 !PATS
 !FIND $SITES
 !MIND -1

 ! giving SHELX delta-intensity data
HKLF 4
 ! giving SHELX delta-F data
 !HKLF 3
END
EOF-shelin

cp $SCRIPT ${SHELX_dir}/iso.ins
cp $SCRIPT ${SHELX_dir}/ano.ins
cp $SCRIPT ${SHELX_dir}/fh.ins
cp $SCRIPT ${SHELX_dir}/fm.ins
cat $SCRIPT |\
nawk -v sites=$SITES '/SFAC/{print $1,$NF} /UNIT/{print $1,sites*4}\
     /TREF/{print " ! resolution cutoff"; print " ! SHEL 99 4";\
      print "PATS";print "FIND",sites; print "MIND -1"; print "NTRY 20";next} {print}' |\
cat >! ${SHELX_dir}/shelxd.ins

# set up directories for alternative space groups
set ORIGINAL = "$SCRIPT"

foreach sg ( $otherSGs )
    set SCRIPT = ${SHELX_dir}/${sg}/ano.ins
    
    if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak
    if(-e "${SHELX_dir}/${sg}") then
	test -d "${SHELX_dir}/${sg}"
	if($status) then
	    # move the offending file
	    mv ${SHELX_dir}/${sg} ${SHELX_dir}/${sg}.file
	endif
    else
	mkdir ${SHELX_dir}/${sg}
    endif
    
    cat $CLIBD/symop.lib ${tempfile}screwies |\
    nawk -v SG=$sg 'ops{print "newSYMM", $0; --ops}\
     $4 == SG  && $1 < 999 { ops=$2-1; getline}' >! ${tempfile}symops

    # replace SG with new one (sg)
    cat ${tempfile}symops "$ORIGINAL" |\
    nawk '/^newSYM/{++ops; op[ops]=substr($0, 4)} \
    /^SYMM/{for(i=1;i<=ops;++i){print op[i]}; while($0 ~ /^SYMM/){getline}}  ! /SYMM/' |\
    nawk -v SG=$SG -v sg=$sg '$2==SG{print " ! " sg} $2 != SG' |\
    cat >! $SCRIPT
    
    rm -f ${tempfile}symops
    
    # update other files
    cp ${SCRIPT} ${SHELX_dir}/${sg}/iso.ins
    cp ${SCRIPT} ${SHELX_dir}/${sg}/fh.ins
    cat $SCRIPT |\
    nawk -v sites=$SITES '/SFAC/{print $1,$NF} /UNIT/{print $1,sites*4}\
         /TREF/{print " ! resolution cutoff"; print " ! SHEL 99 4";\
          print "PATS";print "FIND",sites; print "MIND -1"; print "NTRY 20";next} {print}' |\
    cat >! ${SHELX_dir}/${sg}/shelxd.ins
    set temp = `pwd`
    cd ${SHELX_dir}/${sg}
    ln -sf ../ano.hkl ano.hkl
    ln -sf ../iso.hkl iso.hkl
    ln -sf ../fh.hkl fh.hkl >& /dev/null
    cd $temp
    
end

rm -f ${tempfile}screwies >& /dev/null





##############################################################
# create jiffy for getting I/sigma of SHELX file(s)
##############################################################
set SCRIPT = "${SHELX_dir}/best_signal.com"
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak
cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
#       get best signal of (one or more) shelx hkl files
#       and copy it to best.hkl
#
#
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk
#
set tempfile = ${tempfile}sig.
#
echo -n "" >! \${tempfile}signal

# run through all input files
set i = 0
while(\$i < \$#argv)
    @ i = ( \$i + 1 )

    echo -n "\$argv[\$i] " >> \${tempfile}signal
    
    # get mean I/sigma of this shelx file
    cat \$argv[\$i] |\\
    nawk '\$5+0 != 0 {++n; sum+=\$4/\$5} END{print sum/n}' |\\
    cat >> \${tempfile}signal
end

# get best, overall signal file
set best = \`sort -nr +1 \${tempfile}signal | nawk 'NR==1{print \$1}'\`
# copy it to new location
cp \$best best.hkl

rm -f \${tempfile}signal >& /dev/null

exit

EOF-script
chmod a+x $SCRIPT







##############################################################
# create SHELX hkl converter script
##############################################################
set SCRIPT = ${SHELX_dir}/mtz2SHELX.com
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak

cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
#  Conversion script to output I+ - I- from  an mtz  in SHELX format
#              OR output IMEAN - IMEAN from two mtzs in SHELX format
#
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk
#
set ano_outfile = "SHELX_ano.hkl"
set iso_outfile = "SHELX_iso.hkl"
#
set tempfile = ${tempfile}mtz2shelx
#
goto Setup
#######################################
#
#   usage: $SCRIPT [native.mtz] derivative.mtz
#
#######################################
ReturnFromSetup:

Dano:
echo "outputting anomalous differences from \$infiles in SHELX format as \$ano_outfile"

# SHELX anomalous
mtz2various hklin \$infiles \
            hklout \${tempfile}hkl << EOF-shelx1
OUTPUT SHELX
#LABIN FP=I(-) SIGFP=SIGI(-) FPH=I(+) SIGFPH=SIGI(+)
FSQUARED
LABIN DP=DANO SIGDP=SIGDANO
END
EOF-shelx1
if(\$status) exit 3

# strip off non-SHELX lines
cat \${tempfile}hkl | nawk '\$1 !~ /[A-Z]/ && \$5 != 0' >! \$ano_outfile

# print out mean DI/sigma
nawk '(NF == 6)&&(\$5 != 0){sum += \$4/\$5; ++n} END{if(n) print "Dano/sig =", sum/n}' \$ano_outfile

goto cleanup

Diso:
# SHELX can also use isomorphous differences

echo "outputting isomorphous difference between \$infiles in SHELX format as \$iso_outfile"
echo "\$infiles had better be on the same scale! "

# do difference HKLs
cad HKLIN1 \$infiles[1]                     \
    HKLIN2 \$infiles[2]                     \
    HKLOUT \${tempfile}shelx.mtz  << eof-cad-shelx
LABI FILE 1 E1=IMEAN E2=SIGIMEAN
CTYP FILE 1 E1=J     E2=Q
LABO FILE 1 E1=I1    E2=SIGI1
        
LABI FILE 2 E1=IMEAN E2=SIGIMEAN
CTYP FILE 2 E1=J     E2=Q
LABO FILE 2 E1=I2    E2=SIGI2
eof-cad-shelx
if(\$status) exit 2

# Dump difference in SHELX format
mtz2various hklin \${tempfile}shelx.mtz     \
            hklout \${tempfile}hkl  << EOF-shelx2
OUTPUT SHELX
LABIN FP=I2 SIGFP=SIGI2 FPH=I1 SIGFPH=SIGI1
END
EOF-shelx2
if(\$status) exit 3

# strip off non-SHELX lines
cat \${tempfile}hkl | nawk '\$1 !~ /[A-Z]/ && \$5 != 0' >! \$iso_outfile

# print out mean DI/sigma
nawk '(NF == 6)&&(\$5 != 0){sum += \$4/\$5; ++n} END{if(n) print "Diso/sig =", sum/n}' \$iso_outfile


cleanup:
rm -f \${tempfile}shelx.mtz >& /dev/null
rm -f \${tempfile}hkl       >& /dev/null

exit

#####################################################################

Setup:
set infiles
foreach arg ( \$* )
    if((\$arg =~ *.mtz)&&(-e \$arg)) then
	if(\$arg =~ *.mtz) set infiles = \`echo \$arg \$infiles\`
    endif
end

if (\$#infiles == 1) then
    # do something to identify Is?
    goto Dano
endif

if (\$#infiles == 2) then
    # do something to identify Is?
    goto Diso
endif

# no files, exit
cat << EOF
usage: \$0 native.mtz derivative.mtz	(output DISO in \$iso_outfile)
       OR
       \$0 derivative.mtz		(output DANO in \$ano_outfile)
       
NOTE: NO scaling will be done.  
      Make sure your MTZs are on the same scale.

EOF
exit 9

goto ReturnFromSetup


EOF-script
chmod a+x $SCRIPT










##############################################################
# create SHELX atom converter script
##############################################################
set SCRIPT = $SHELX_dir/SHELX2pdb.com
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak

cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
# converts SHELXL output into PDB format
#
#  B = 79*U^2
#
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk
#

if(! -e "\$1") then
    echo "usage: \$0 shelx.res [outfile.pdb]"
    exit
endif
if(\$2 !~ *.pdb) then
    set outfile = \$1
    set outfile = \`basename \$outfile .ins\`
    set outfile = \`basename \$outfile .res\`
    set outfile = \`basename \$outfile .lst\`".pdb"
else
    set outfile = \$2
endif



set CELL = \`nawk '\$1=="CELL"{print \$3, \$4, \$5, \$6, \$7, \$8}' \$1\`

cat \$1 |\
nawk '/[Ss][Ff][Aa][Cc]/{Ee=toupper(\$NF)} (NF==7 && \$6 ~ /0000/){++i; \
printf("%5d%10.5f%10.5f%10.5f%10.5f%5.2f   35%10d%2s   MTL M\n",\
i, \$3,\$4,\$5, 79*\$7*\$7, \$6-10, i, Ee)}' |\
cat >! temp\$\$.frac

coordconv xyzin temp\$\$.frac xyzout \$outfile << EOF >> temp\$\$.log
CELL \$CELL
INPUT FRAC
OUTPUT PDB ORTH 1
END
EOF
if(! \$status) then
    echo "PDB version of SHELX sites in \$1 is: \$outfile"
else
    cat temp\$\$.*
    
    echo "problem converting \$1 to \$outfile (see above for explanation)"
endif
rm -f temp\$\$.*

EOF-script
chmod a+x $SCRIPT


















###############################################################################
#
#	auto-run SHELX ?
#
###############################################################################
set SCRIPT = $SHELX_dir/shelx.com
if(! -e $SCRIPT) cat << EOF-script >! $SCRIPT
#! /bin/csh -f
echo "not done, sorry"
EOF-script
chmod a+x $SCRIPT










#
# Create the README file
set SCRIPT = "${SHELX_dir}/README"
cat << EOF-README >! $SCRIPT

So, how do you use SHELX?

If you are in a hurry, type this:
$SHELXS ano
and
$SHELXS iso

to locate heavy atom sites using your best anomalous differences, or 
your best dispersive differences (respectively).

------------------
Key concepts:
1)  Shelx needs an input file (ALWAYS *.ins) and an HKL file (ALWAYS *.hkl)
2)  The prefixes of these files MUST be the same
3)  Shelx produces two output files: prefix.res and prefix.lst
	prefix.res contains the atom sites
	prefix.lst is the "log" file
4)  the UNIT and ZERR lines in prefix.ins should indicate the expected number
	of metal sites in the CELL, not the ASU
	UNIT is for metal:protein scaling and
	ZERR controls site picking
5)  in prefix.lst, lower CFOMs are BETTER


------------------
How do I know if it worked?
1)  That's a good question, it can be hard to tell.
2)  a CFOM below 0.1 is really good.
3)  make sure your sites are not on special positions (s.o.f.==1)
4)  Calculate Patterson vectors from SHELX's sites, and 
    see if they are consistent with your Patterson peaks.


For more detailed information, go to the SHELX homepage at:

netscape http://shelx.uni-ac.gwdg.de/SHELX/

EOF-README

set SHELX_SET_UP

goto Return_from_Setup_SHELX

























exit
Setup_XPLOR:
##############################################################

 #    #  #####   #        ####   #####
  #  #   #    #  #       #    #  #    #
   ##    #    #  #       #    #  #    #
   ##    #####   #       #    #  #####
  #  #   #       #       #    #  #   #
 #    #  #       ######   ####   #    #

##############################################################
# create XPLOR/CNS hkl converter script
##############################################################
set SCRIPT = ${XPLOR_dir}/mtz2XPLOR.com
if((-e "$SCRIPT")&&(! $?FRUGAL)) mv ${SCRIPT} ${SCRIPT}.bak

cat << EOF-script >! $SCRIPT
#! /bin/csh -f
#
#  Conversion script to output F+ and F- in XPLOR format for one wavelength
#
#
set nawk = $nawk
\$nawk 'BEGIN{print}' >& /dev/null
if(\$status) set nawk = awk
alias nawk \$nawk
#
set outfile = XPLOR.fobs
set tempfile = ${tempfile}mtz2xplor
#
#
foreach arg ( \$* )
    if((! \$?infile)&&(-e \$arg)) then
    if(\$arg =~ *.mtz) set infile = \$arg
    endif
end
if(! \$?infile) exit 9
################################
#
#   usage: $SCRIPT blah.mtz
#	only works on merged MTZs
#
################################
# this is the mtz2various format to use, you need 
mtz2various hklin \$infile hklout \${tempfile}fobs << EOF
OUTPUT XPLOR
LABIN FP=F SIGFP=SIGF DP=DANO SIGDP=SIGDANO ISYM=ISYM
END
EOF
if(\$status) exit 9

set NREF = \`wc \${tempfile}fobs | nawk '{print \$1 +1}'\`

echo " NREFlections= \$NREF"                                         >! \$outfile
echo " ANOMalous=TRUE"                                              >> \$outfile
echo " DECLare NAME=FOBS    DOMAin=RECIprocal   TYPE=COMP END"      >> \$outfile
echo " DECLare NAME=SIGMA   DOMAin=RECIprocal   TYPE=REAL END"      >> \$outfile
cat \${tempfile}fobs                                                 >> \$outfile

rm -f \${tempfile}fobs >& /dev/null
rm -f MTZ2VSCR >& /dev/null

EOF-script
chmod a+x $SCRIPT




#
# Make a README file
set SCRIPT = "${XPLOR_dir}/README"
cat << EOF-README >! $SCRIPT

So, what's all this then?

    The files matching *.fobs are your merged F+ and F- data in 
X-PLOR/CNS format.  Don't see any files matching *.fobs?  Type 
"make XPLOR" and the Elves will do whatever it takes to create 
them.
    These files have been formatted to feed directly into X-PLOR 3.1's 
"mad_merge.inp" procedure, which should be located in the 
\${XPLOR}/tutorial/mad/ directory (where \${XPLOR} is wherever your 
system has X-PLOR installed.)

------------------
Briefly, mad_merge.inp uses the actual anomalous and dispersive
differences of a MAD data set to refine a protein structure.  
In this case, phase calculation is circumvented by refining 
calculated anomalous and dispersive differences directly against
F+ and F- of each wavelength.  JMH has not gotten this to work, 
but when he does, it will be implemented here.

For more detailed information, go to the X-PLOR homepage at:
netscape http://xplor.csb.yale.edu/


EOF-README

set XPLOR_SET_UP
goto Return_from_Setup_XPLOR















































exit
MTZsearch:
###############################################################################

 #    #   #####  ######           ####   ######    ##    #####    ####   #    #
 ##  ##     #        #           #       #        #  #   #    #  #    #  #    #
 # ## #     #       #             ####   #####   #    #  #    #  #       ######
 #    #     #      #                  #  #       ######  #####   #       #    #
 #    #     #     #              #    #  #       #    #  #   #   #    #  #    #
 #    #     #    ######           ####   ######  #    #  #    #   ####   #    #

###############################################################################
#
#   actively search for measured x-ray data
#
###############################################################################

set number_of_mtzs = 0
#unset FIRSTIME

# start looking for MTZs
echo "Looking for raw data ..."
foreach place ( $* . ~/ )
    # make sure it's a directory
    set place = `ls -lnd $place |& nawk '/^d/{print $NF}'`
    
    # squash known bad paths
    foreach baddie ( $baddirs )
	if("$place" =~ $baddie*) place = ""
    end
    
    if((-e "$place")&&($number_of_mtzs == 0)) then
	# search this tree for some mtzs
	find $place -name '*.mtz' -print |&\
	nawk 'NF==1 && /.mtz$/' >&! ${tempfile}
	
	# see if we got anything
	set temp = `cat ${tempfile} | wc -l`
	if("$temp" > 0) then
	    # prepare to analyze these mtzs
	    mv ${tempfile} ${tempfile}mtzs
	    
	    # stop search after successful find
	    set number_of_mtzs = "$temp"
	endif
    endif
end

# remember that we have done this
set SEARCHED_FOR_MTZS

goto GetMTZinfo












































exit
Unwrap_Awk_Scripts:
################################################################################

 #    #  #    #  #    #  #####     ##    #####
 #    #  ##   #  #    #  #    #   #  #   #    #
 #    #  # #  #  #    #  #    #  #    #  #    #
 #    #  #  # #  # ## #  #####   ######  #####
 #    #  #   ##  ##  ##  #   #   #    #  #
  ####   #    #  #    #  #    #  #    #  #

################################################################################
#   Unwrap complex AWK programs used by Elves
################################################################################

cat << EOF-sequence >! ${tempfile}sequencer.awk
#! $nawk -f
#
#   Process/identify protein sequences in a text/pdb file
#   as > 20 consecutive, aa letters
#
#   plus a few other goodies, such as monoisotopic mass, identifying 
#   chemically unstable sequences, and common cleavage sites (using chop=yes)
#
#
BEGIN {


    # average mass
    H  =  1.007947
    C  = 12.0111  
    N  = 14.006747
    O  = 15.999943
    P  = 30.973762
    S  = 32.0666  
    Cl = 35.45279 
    Se = 78.963   

    # residue masses -NH-CH(R)-CO-
    aMass["G"] =  2*H+2*C+N+O+ H
    aMass["A"] =  2*H+2*C+N+O+ C+3*H
    aMass["V"] =  2*H+2*C+N+O+ C+H + 2*(C+3*H)
    aMass["I"] =  2*H+2*C+N+O+ C+H + C+2*H + 2*(C+3*H)
    aMass["L"] =  2*H+2*C+N+O+ C+2*H + C+H + 2*(C+3*H)
    aMass["D"] =  2*H+2*C+N+O+ C+2*H + C + 2*O  +H
    aMass["N"] =  2*H+2*C+N+O+ C+2*H + C + O+N+2*H
    aMass["E"] =  2*H+2*C+N+O+ C+2*H + C+2*H + C + 2*O  +H
    aMass["Q"] =  2*H+2*C+N+O+ C+2*H + C+2*H + C + O+N+2*H
    aMass["R"] =  2*H+2*C+N+O+ C+2*H + C+2*H + C+2*H + N + C + 2*(N+ 2*H)
    aMass["K"] =  2*H+2*C+N+O+ C+2*H + C+2*H + C+2*H + C+2*H + N+2*H
    aMass["M"] =  2*H+2*C+N+O+ C+2*H + C+2*H + S     + C+3*H
    aMass["C"] =  2*H+2*C+N+O+ C+2*H + S+H
    aMass["S"] =  2*H+2*C+N+O+ C+2*H + O+H
    aMass["T"] =  2*H+2*C+N+O+ C+H   + O+H   + C+3*H
    aMass["H"] =  2*H+2*C+N+O+ C+2*H + C     + C+H + N+H + C+H + N 
    aMass["W"] =  2*H+2*C+N+O+ C+2*H + 3*C + 5*(C+H) + N+H
    aMass["F"] =  2*H+2*C+N+O+ C+2*H + C +   5*(C+H)
    aMass["Y"] =  2*H+2*C+N+O+ C+2*H + C +   5*(C+H) + O
    aMass["P"] =  1*H+2*C+N+O+ 3*(C+2*H)

    # monoisotopic mass (most abundant isotope)
    H  = 1.007825 
    C  = 12.00000 
    N  = 14.003074
    O  = 15.994915
    P  = 30.973762
    S  = 31.972070
    Cl = 34.968852
    Se = 79.916520

    # residue masses -NH-CH(R)-CO-
    iMass["G"] =  2*H+2*C+N+O+ H
    iMass["A"] =  2*H+2*C+N+O+ C+3*H
    iMass["V"] =  2*H+2*C+N+O+ C+H + 2*(C+3*H)
    iMass["I"] =  2*H+2*C+N+O+ C+H + C+2*H + 2*(C+3*H)
    iMass["L"] =  2*H+2*C+N+O+ C+2*H + C+H + 2*(C+3*H)
    iMass["D"] =  2*H+2*C+N+O+ C+2*H + C + 2*O  +H
    iMass["N"] =  2*H+2*C+N+O+ C+2*H + C + O+N+2*H
    iMass["E"] =  2*H+2*C+N+O+ C+2*H + C+2*H + C + 2*O  +H
    iMass["Q"] =  2*H+2*C+N+O+ C+2*H + C+2*H + C + O+N+2*H
    iMass["R"] =  2*H+2*C+N+O+ C+2*H + C+2*H + C+2*H + N + C + 2*(N+ 2*H)
    iMass["K"] =  2*H+2*C+N+O+ C+2*H + C+2*H + C+2*H + C+2*H + N+2*H
    iMass["M"] =  2*H+2*C+N+O+ C+2*H + C+2*H + S     + C+3*H
    iMass["C"] =  2*H+2*C+N+O+ C+2*H + S+H
    iMass["S"] =  2*H+2*C+N+O+ C+2*H + O+H
    iMass["T"] =  2*H+2*C+N+O+ C+H   + O+H   + C+3*H
    iMass["H"] =  2*H+2*C+N+O+ C+2*H + C     + C+H + N+H + C+H + N 
    iMass["W"] =  2*H+2*C+N+O+ C+2*H + 3*C + 5*(C+H) + N+H
    iMass["F"] =  2*H+2*C+N+O+ C+2*H + C +   5*(C+H)
    iMass["Y"] =  2*H+2*C+N+O+ C+2*H + C +   5*(C+H) + O
    iMass["P"] =  1*H+2*C+N+O+ 3*(C+2*H)
    iMass["formyl"]=  C + O
    iMass["p"] =  P + 3*O
}

# read sequence of a PDB
/^ATOM / {pdb = 1}
pdb && substr(\$0,12,5) == "  CA " {

    Restype = substr(\$0, 18, 3)
    Segid   = substr(\$0, 22, 1)    	# O/Brookhaven-style segment ID
    Resnum  = substr(\$0, 23, 4)+0
    
    # check for breaks
    if((Segid != lastSegid)||(nextResnum != Resnum)) {
	# break in chain
	seq = seq " "
	lastSegid = Segid
    }
    nextResnum = Resnum +1

    # translate three-letter code to one letter
    if(Restype == "ALA") seq = seq "A"
    if(Restype == "CYS") seq = seq "C"
    if(Restype == "ASP") seq = seq "D"
    if(Restype == "GLU") seq = seq "E"
    if(Restype == "PHE") seq = seq "F"
    if(Restype == "GLY") seq = seq "G"
    if(Restype == "HIS") seq = seq "H"
    if(Restype == "ILE") seq = seq "I"
    if(Restype == "LYS") seq = seq "K"
    if(Restype == "LEU") seq = seq "L"
    if(Restype == "MET") seq = seq "M"
    if(Restype == "MSE") seq = seq "M"
    if(Restype == "ASN") seq = seq "N"
    if(Restype == "PRO") seq = seq "P"
    if(Restype == "GLN") seq = seq "Q"
    if(Restype == "ARG") seq = seq "R"
    if(Restype == "SER") seq = seq "S"
    if(Restype == "THR") seq = seq "T"
    if(Restype == "VAL") seq = seq "V"
    if(Restype == "TRP") seq = seq "W"
    if(Restype == "TYR") seq = seq "Y"    
}

# don't do other kinds of search in a PDB file
pdb {next}

(length(\$0) > 9 || seq != "") && ! /^>/ {
#{
    # remove leading spaces
    line = ""
    for(i=1;i<NF;++i) line = line \$i " " 
    line = line \$NF    

    # scan for aa letters
    for(i=1;i<=length(line);++i)
    {
	c = toupper(substr(line, i, 1));
	# ignore these characters
	if(c == "\\"") c = ""
	if(c == "\\t") c = ""
	if(c == " ")  c = " "

	if(c !~ /["",A,C-I,K-N,P-T,V-Y]/)
	{
	    c = " "
	}
	seq = seq c 
    }
}

# blank lines terminate a sequence
NF==0 {
    seq = seq " "
}

END{

if(debug) print seq

# break up strings of protein letters into "words"
num = split(seq, sequence)

for(n=1;n<=num;++n)
    if(length(sequence[n]) >= 20)
    {
	# look for all the horrible things that can happen to the peptide
	acid = ""
	base = ""
	race = ""
	pyroQ = ""
	CNBr = ""

	factorXa = ""
	chymotrypsin = ""
	endoproteinaseDN = ""
	endoproteinaseKC = ""
	thrombin = ""
	trypsin = ""
	pepsin = ""
	V8 = ""

	# weigh this chain
	weight = Met = His = Cys = A280 = "";
	weight = O + 3*H;
	mass   = O + 3*H;
	for(i=1;i<=length(sequence[n]);++i)
	{
	    c = substr(sequence[n], i, 1);
	    
	    # weigh this chain
	    weight += aMass[c];
	    mass   += aMass[c];
	
	    # count potentially derivitized residues
	    if(c == "M") ++Met;
	    if(c == "C") ++Cys;
	    if(c == "H") ++His;
	    
	    # add up (denatured) extinction coefficient
	    if(c == "W") A280 += 5600
	    if(c == "Y") A280 += 1400
	    if(c == "F") A280 += 197
	    
	    # chemical instabilities  (add up single-cleavage MWs)
	    if(c == "M" )                       CNBr = CNBr " " mass
	    if(substr(sequence[n],i,2) == "DP") acid = acid " " mass
	    if(substr(sequence[n],i,2) == "NG") base = base " " mass
	    if(substr(sequence[n],i,2) == "NG") race = race ", " i
	    if(substr(sequence[n],1,1) == "Q") pyroQ = 1
	    if((substr(sequence[n],1,1) == "M")&&(substr(sequence[n],2,1) == "Q")) pyroQ = 2

	    # proteolytic recognition sites (add up single-cleavage MWs)?
	    if(substr(sequence[n],i-3,4) == "IEGR") factorXa = factorXa  " " mass
	    if(substr(sequence[n],i+1,1) == "D") endoDN = endoDN " " mass
	    if(c ~ /[Y,F,W]/)     chymotrypsin = chymotrypsin    " " mass
#	    if(c ~ /[L,M,A,N,E]/) chymotrypsin = chymotrypsin    " " mass "*"
	    if(c ~ /[K]/)               endoKC = endoKC          " " mass
	    if(c ~ /[R]/)             thrombin = thrombin        " " mass
	    if(c ~ /[R,K]/)            trypsin = trypsin         " " mass
	    if(c ~ /[F,L]/)             pepsin = pepsin          " " mass
#	    if(c ~ /[Y,W,I,M]/)         pepsin = pepsin          " " mass "*"
	    if(c ~ /[E]/)                   V8 = V8              " " mass
	    if(c ~ /[E]/)                  V82 = V82             " " mass
	    if(c ~ /[D]/)                  V82 = V82             " " mass
	}
	
	# finish off cleavages
	acid = acid " " mass
	base = base " " mass
	CNBr = CNBr " " mass

	factorXa = factorXa         " " mass
	chymotrypsin = chymotrypsin " " mass
	endoDN = endoDN             " " mass
	endoKC = endoKC             " " mass
	thrombin = thrombin         " " mass
	trypsin = trypsin           " " mass
	pepsin = pepsin             " " mass
	V8 = V8                     " " mass
	
	
	# we have found an acceptable protein sequence
	print mass " Da chain: "
	l=length(sequence[n])
	while(length(sequence[n]) > 0)
	{
	    # actually print out sequence here
	    print substr(sequence[n], 1, 80)
	    sequence[n] = substr(sequence[n], 81)
	}
	print ""
	print l "aa"
	print Met+0 "met"
	print Cys+0 "cys"
	print His+0 "his"
	print ""
	printf "denatured A(280nm) = %.4f*l*c (c in g/L)\\n", A280/weight
	printf "    SeMET MAD Rano = %.1f%%\\n", 100*(M*8^2)/(7^2 * (weight/14))
	print ""
	
	f=split(acid base, Split)
	if((f>2) || pyroE != "")
	{
	    print "Chemical Instabilities: "
	}
	f=split(acid, Split)
	if(f>1)
	{
	    printf "acid (D*P):                       "
	    for(i=1;i<=f;++i) printf Split[i] - Split[i-1] " "
	    print ""
	}
	f=split(base, Split)
	if(f>1)
	{
	    printf "base (N*G):                       "
	    for(i=1;i<=f;++i) printf Split[i] - Split[i-1] " "
	    print ""
	    print "racemization hazard at" substr(race,2)
	}
	if(pyroQ)
	{
	    print "residue " pyroQ " could form an N-cyclized glutamine "
	}

	if(chop)
	{
	    print ""
	    
	    f=split(CNBr, Split)
	    if(f>1)
	    {
		printf "CNBr (M*):                        "
		for(i=1;i<=f;++i) printf Split[i] - Split[i-1] " "
		print ""
	    }
	    print ""
	    print "Common proteases: "
	    f=split(factorXa, Split)
	    if(f>1)
	    {
	        printf "factorXa (IEGR*):                 "
	        for(i=1;i<=f;++i) printf Split[i] - Split[i-1] " "
	        print ""
	    }
	    f=split(thrombin, Split)
	    if(f>1)
	    {
	        printf "thrombin (R*):                    "
	        for(i=1;i<=f;++i) printf Split[i] - Split[i-1] " "
	        print ""
	    }
	    f=split(trypsin, Split)
	    if(f>1)
	    {
	        printf "trypsin (R*, K*):                 "
	        for(i=1;i<=f;++i) printf Split[i] - Split[i-1] " "
	        print ""
	    }
	    f=split(endoKC, Split)
	    if(f>1)
	    {
	        printf "endoproteinase Lys-C (K*):        "
	        for(i=1;i<=f;++i) printf Split[i] - Split[i-1] " "
	        print ""
	    }
	    f=split(endoDN, Split)
	    if(f>1)
	    {
	        printf "endoproteinase Asp-N (*D):        "
	        for(i=1;i<=f;++i) printf Split[i] - Split[i-1] " "
	        print ""
	    }
	    f=split(chymotrypsin, Split)
	    if(f>1)
	    {
	        printf "chymotrypsin (W*,Y*,F*, +others): "
	        for(i=1;i<=f;++i) printf Split[i] - Split[i-1] " "
	        print ""
	    }
	    f=split(pepsin, Split)
	    if(f>1)
	    {
	        printf "pepsin (F*, L*, +others):         "
	        for(i=1;i<=f;++i) printf Split[i] - Split[i-1] " "
	        print ""
	    }
	    f=split(V8, Split)
	    if(f>1)
	    {
	        printf "V8 protease (E*):                 "
	        for(i=1;i<=f;++i) printf Split[i] - Split[i-1] " "
	        print ""
	    }
	    f=split(V82, Split)
	    if(f>1)
	    {
	        printf "V8 protease (E*,D*):              "
	        for(i=1;i<=f;++i) printf Split[i] - Split[i-1] " "
	        print ""
	    }
	}
    }
}

EOF-sequence
chmod a+x ${tempfile}sequencer.awk

















cat << EOF-elements >! ${tempfile}elements.awk
#! $nawk -f
#
#	General-purpose database for equating elements
#	with their x-ray absorption edges (and f', f" values)
#
#	if the first word on the line is an element symbol or name,
#	    an x-ray spectrum for that element will be displayed
#
#	if the first thing on the line is a number (energy or wavelength)
#	    the element with an edge closest to that energy will be displayed
#
#	if the line is an element and a number (energy or wavelength)
#	    the edge of the provided element closest to that energy will be given.
#
#	nothing gets printed if no match is found
#
BEGIN{
#   at no      Sym Name
element[1]   = "H  Hydrogen     "
element[2]   = "He Helium       "
element[3]   = "Li Lithium      "
element[4]   = "Be Beryllium    "
element[5]   = "B  Boron        "
element[6]   = "C  Carbon       "
element[7]   = "N  Nitrogen     "
element[8]   = "O  Oxygen       "
element[9]   = "F  Fluorine     "
element[10]  = "Ne Neon         "
element[11]  = "Na Sodium       "
element[12]  = "Mg Magnesium    "
element[13]  = "Al Aluminum     "
element[14]  = "Si Silicon      "
element[15]  = "P  Phosphorus   "
element[16]  = "S  Sulfur       "
element[17]  = "Cl Chlorine     "
element[18]  = "Ar Argon        "
element[19]  = "K  Potassium    "
element[20]  = "Ca Calcium      "
element[21]  = "Sc Scandium     "
element[22]  = "Ti Titanium     "
element[23]  = "V  Vanadium     "
element[24]  = "Cr Chromium     "
element[25]  = "Mn Manganese    "
element[26]  = "Fe Iron         "
element[27]  = "Co Cobalt       "
element[28]  = "Ni Nickel       "
element[29]  = "Cu Copper       "
element[30]  = "Zn Zinc         "
element[31]  = "Ga Gallium      "
element[32]  = "Ge Germanium    "
element[33]  = "As Arsenic      "
element[34]  = "Se Selenium     "
element[35]  = "Br Bromine      "
element[36]  = "Kr Krypton      "
element[37]  = "Rb Rubidium     "
element[38]  = "Sr Strontium    "
element[39]  = "Y  Yttrium      "
element[40]  = "Zr Zirconium    "
element[41]  = "Nb Niobium      "
element[42]  = "Mo Molybdenum   "
element[43]  = "Tc Technetium   "
element[44]  = "Ru Ruthenium    "
element[45]  = "Rh Rhodium      "
element[46]  = "Pd Palladium    "
element[47]  = "Ag Silver       "
element[48]  = "Cd Cadmium      "
element[49]  = "In Indium       "
element[50]  = "Sn Tin          "
element[51]  = "Sb Antimony     "
element[52]  = "Te Tellurium    "
element[53]  = "I  Iodine       "
element[54]  = "Xe Xenon        "
element[55]  = "Cs Cesium       "
element[56]  = "Ba Barium       "
element[57]  = "La Lanthanum    "
element[58]  = "Ce Cerium       "
element[59]  = "Pr Praseodymium "
element[60]  = "Nd Neodymium    "
element[61]  = "Pm Promethium   "
element[62]  = "Sm Samarium     "
element[63]  = "Eu Europium     "
element[64]  = "Gd Gadolinium   "
element[65]  = "Tb Terbium      "
element[66]  = "Dy Dysprosium   "
element[67]  = "Ho Holmium      "
element[68]  = "Er Erbium       "
element[69]  = "Tm Thulium      "
element[70]  = "Yb Ytterbium    "
element[71]  = "Lu Lutetium     "
element[72]  = "Hf Hafnium      "
element[73]  = "Ta Tantalum     "
element[74]  = "W  Tungsten     "
element[75]  = "Re Rhenium      "
element[76]  = "Os Osmium       "
element[77]  = "Ir Iridium      "
element[78]  = "Pt Platinum     "
element[79]  = "Au Gold         "
element[80]  = "Hg Mercury      "
element[81]  = "Tl Thallium     "
element[82]  = "Pb Lead         "
element[83]  = "Bi Bismuth      "
element[84]  = "Po Polonium     "
element[85]  = "At Astatine     "
element[86]  = "Rn Radon        "
element[87]  = "Fr Francium     "
element[88]  = "Ra Radium       "
element[89]  = "Ac Actinium     "
element[90]  = "Th Thorium      "
element[91]  = "Pa Protactinium "
element[92]  = "U  Uranium      "
element[93]  = "Np Neptunium    "
element[94]  = "Pu Plutonium    "
element[95]  = "Am Americium    "
element[96]  = "Cm Curium       "
element[97]  = "Bk Berkelium    "
element[98]  = "Cf Californium  "
element[99]  = "Es Einsteinium  "
element[100] = "Fm Fermium      "
element[101] = "Md Mendelevium  "
element[102] = "No Nobelium     "
element[103] = "Lr Lawrencium   "

# x-ray edge information (condensed to "interesting" points)
F["H  "  1] = "1   H  Hydrogen         -     -       -    "
F["He "  1] = "2   He Helium           -     -       -    "
F["Li "  1] = "3   Li Lithium          -     -       -    "
F["Be "  1] = "4   Be Beryllium        -     -       -    "
F["B  "  1] = "5   B  Boron            -     -       -    "
F["C  "  1] = "6   C  Carbon           -     -       -    "
F["N  "  1] = "7   N  Nitrogen         -     -       -    "
F["O  "  1] = "8   O  Oxygen           -     -       -    "
F["F  "  1] = "9   F  Fluorine         -     -       -    "
F["Ne "  1] = "10  Ne Neon             -     -       -    "
F["Na "  1] = "11  Na Sodium           -     -       -    "
F["Mg "  1] = "12  Mg Magnesium        -     -       -    "
F["Al "  1] = "13  Al Aluminum         -     -       -    "
F["Si "  1] = "14  Si Silicon          -     -       -    "
F["P  "  3] = "15  P  Phosphorus    2148   -7.423   4.112 edge"
F["S  "  3] = "16  S  Sulfur        2475   -7.264   4.104 edge"
F["Cl "  3] = "17  Cl Chlorine      2825   -7.487   4.091 edge"
F["Ar "  4] = "18  Ar Argon         3205   -7.788   4.074 edge"
F["K  "  4] = "19  K  Potassium     3610   -7.584   4.063 edge"
F["Ca "  6] = "20  Ca Calcium       4041   -7.499   4.053 edge"
F["Sc "  6] = "21  Sc Scandium      4495   -7.856   4.030 edge"
F["Ti "  6] = "22  Ti Titanium      4969   -7.703   4.008 edge"
F["V  "  7] = "23  V  Vanadium      5468   -7.623   3.995 edge"
F["Cr "  7] = "24  Cr Chromium      5992   -7.701   3.973 edge"
F["Mn "  8] = "25  Mn Manganese     6542   -7.670   3.962 edge"
F["Fe "  9] = "26  Fe Iron          7115   -7.712   3.950 edge"
F["Co "  9] = "27  Co Cobalt        7711   -8.149   3.939 edge"
F["Ni " 10] = "28  Ni Nickel        8335   -8.132   3.920 edge"
F["Cu " 10] = "29  Cu Copper        8981   -8.209   3.901 edge"
F["Zn " 11] = "30  Zn Zinc          9661   -8.112   3.896 edge"
F["Ga " 12] = "31  Ga Gallium      10370   -7.949   3.892 edge"
F["Ge " 13] = "32  Ge Germanium    11106   -7.981   3.881 edge"
F["As " 13] = "33  As Arsenic      11869   -8.255   3.865 edge"
F["Se " 14] = "34  Se Selenium     12660   -8.320   3.846 edge"
F["Br " 15] = "35  Br Bromine      13476   -8.289   3.826 edge"
F["Kr " 16] = "36  Kr Krypton      14328   -8.256   3.806 edge"
F["Rb " 17] = "37  Rb Rubidium     15200  -10.436   3.787 edge"
F["Sr " 18] = "38  Sr Strontium    16107   -8.266   3.774 edge"
F["Y  " 19] = "39  Y  Yttrium      17041   -8.191   3.754 edge"
F["Zr " 19] = "40  Zr Zirconium    18000   -8.283   3.736 edge"
F["Nb " 20] = "41  Nb Niobium      18988   -8.288   3.714 edge"
F["Mo " 21] = "42  Mo Molybdenum   20000   -9.878   3.698 edge"
F["Tc " 23] = "43  Tc Technetium   21046   -8.489   3.680 edge"
F["Ru " 24] = "44  Ru Ruthenium    22120   -8.164   3.665 edge"
F["Rh " 25] = "45  Rh Rhodium      23222   -8.459   3.650 edge"
F["Pd " 26] = "46  Pd Palladium    24353   -8.219   3.633 edge"
F["Ag "  4] = "47  Ag Silver        3354  -20.966  10.518 edge"
F["Ag "  5] = "47  Ag Silver        3526  -16.055  13.800 edge"
F["Ag "  6] = "47  Ag Silver        3808  -10.003  14.611 edge"
F["Cd "  4] = "48  Cd Cadmium       3540  -21.398  10.767 edge"
F["Cd "  5] = "48  Cd Cadmium       3730  -15.432  13.792 edge"
F["Cd "  7] = "48  Cd Cadmium       4021   -9.699  14.245 edge"
F["In "  4] = "49  In Indium        3733  -21.152  11.048 edge"
F["In "  5] = "49  In Indium        3941  -15.141  13.784 edge"
F["In "  7] = "49  In Indium        4240   -9.690  13.866 edge"
F["Sn "  6] = "50  Sn Tin           4159  -14.958  13.683 edge"
F["Sb "  6] = "51  Sb Antimony      4135  -21.027  11.070 edge"
F["Sb "  8] = "51  Sb Antimony      4383  -14.902  13.571 edge"
F["Te "  6] = "52  Te Tellurium     4344  -21.057  11.006 edge"
F["Te "  8] = "52  Te Tellurium     4615  -14.559  13.442 edge"
F["I  "  6] = "53  I  Iodine        4560  -20.634  10.936 edge"
F["I  "  8] = "53  I  Iodine        4855  -14.434  13.310 edge"
F["Xe "  6] = "54  Xe Xenon         4785  -20.563  10.874 edge"
F["Xe "  9] = "54  Xe Xenon         5106  -14.540  13.185 edge"
F["Cs "  7] = "55  Cs Cesium        5014  -21.106  10.876 edge"
F["Cs "  9] = "55  Cs Cesium        5362  -14.233  13.120 edge"
F["Ba "  7] = "56  Ba Barium        5250  -20.109  10.809 edge"
F["Ba "  9] = "56  Ba Barium        5626  -14.171  12.992 edge"
F["La "  7] = "57  La Lanthanum     5485  -20.405  10.570 edge"
F["La "  9] = "57  La Lanthanum     5893  -14.030  12.840 edge"
F["Ce "  7] = "58  Ce Cerium        5726  -20.237  10.734 edge"
F["Ce " 10] = "58  Ce Cerium        6167  -13.634  12.698 edge"
F["Pr "  7] = "59  Pr Praseodymium  5967  -20.069  10.711 edge"
F["Pr " 10] = "59  Pr Praseodymium  6443  -13.610  12.615 edge"
F["Nd "  8] = "60  Nd Neodymium     6210  -20.540  10.661 edge"
F["Nd " 10] = "60  Nd Neodymium     6724  -13.655  12.624 edge"
F["Pm "  8] = "61  Pm Promethium    6462  -19.898  10.625 edge"
F["Pm " 11] = "61  Pm Promethium    7015  -13.710  12.558 edge"
F["Sm "  8] = "62  Sm Samarium      6719  -19.794  10.651 edge"
F["Sm " 11] = "62  Sm Samarium      7314  -13.591  12.442 edge"
F["Eu "  8] = "63  Eu Europium      6979  -20.382  10.648 edge"
F["Eu " 11] = "63  Eu Europium      7620  -13.183  12.345 edge"
F["Gd "  9] = "64  Gd Gadolinium    7245  -20.213  10.609 edge"
F["Gd " 11] = "64  Gd Gadolinium    7933  -13.095  12.185 edge"
F["Tb "  9] = "65  Tb Terbium       7517  -19.488  10.612 edge"
F["Tb " 12] = "65  Tb Terbium       8254  -13.276  12.233 edge"
F["Dy "  9] = "66  Dy Dysprosium    7793  -19.495  10.588 edge"
F["Dy " 12] = "66  Dy Dysprosium    8583  -13.206  12.196 edge"
F["Ho " 10] = "67  Ho Holmium       8074  -19.458  10.591 edge"
F["Ho " 12] = "67  Ho Holmium       8920  -13.268  12.188 edge"
F["Er " 10] = "68  Er Erbium        8360  -20.135  10.578 edge"
F["Er " 13] = "68  Er Erbium        9267  -12.986  12.157 edge"
F["Tm " 10] = "69  Tm Thulium       8651  -19.330  10.573 edge"
F["Tm " 13] = "69  Tm Thulium       9619  -13.222  12.154 edge"
F["Yb " 10] = "70  Yb Ytterbium     8946  -19.795  10.547 edge"
F["Yb " 13] = "70  Yb Ytterbium     9981  -12.747  11.816 edge"
F["Lu " 11] = "71  Lu Lutetium      9247  -19.360  10.547 edge"
F["Lu " 14] = "71  Lu Lutetium     10351  -12.855  11.722 edge"
F["Hf " 11] = "72  Hf Hafnium       9563  -19.843  10.527 edge"
F["Hf " 14] = "72  Hf Hafnium      10742  -12.733  11.655 edge"
F["Ta " 11] = "73  Ta Tantalum      9884  -19.294  10.511 edge"
F["Ta " 15] = "73  Ta Tantalum     11139  -12.418  11.449 edge"
F["W  " 12] = "74  W  Tungsten     10209  -19.840  10.496 edge"
F["W  " 15] = "74  W  Tungsten     11547  -12.327  11.384 edge"
F["Re " 12] = "75  Re Rhenium      10538  -19.058  10.226 edge"
F["Re " 15] = "75  Re Rhenium      11961  -12.563  11.463 edge"
F["Os " 12] = "76  Os Osmium       10873  -19.529  10.214 edge"
F["Os " 16] = "76  Os Osmium       12388  -12.224  11.392 edge"
F["Ir " 13] = "77  Ir Iridium      11218  -18.898  10.201 edge"
F["Ir " 16] = "77  Ir Iridium      12827  -12.200  11.311 edge"
F["Pt " 13] = "78  Pt Platinum     11566  -19.273  10.202 edge"
F["Pt " 17] = "78  Pt Platinum     13275  -12.348  11.248 edge"
F["Au " 13] = "79  Au Gold         11921  -19.230  10.206 edge"
F["Au " 17] = "79  Au Gold         13736  -12.285  11.152 edge"
F["Hg " 14] = "80  Hg Mercury      12286  -19.370  10.191 edge"
F["Hg " 18] = "80  Hg Mercury      14211  -12.270  11.078 edge"
F["Tl " 14] = "81  Tl Thallium     12660  -18.943  10.157 edge"
F["Tl " 18] = "81  Tl Thallium     14700  -12.289  11.003 edge"
F["Pb " 15] = "82  Pb Lead         13038  -18.670  10.135 edge"
F["Pb " 19] = "82  Pb Lead         15203  -11.882  10.933 edge"
F["Bi " 15] = "83  Bi Bismuth      13421  -18.941  10.125 edge"
F["Bi " 19] = "83  Bi Bismuth      15714  -11.873  10.838 edge"
F["Po " 15] = "84  Po Polonium     13816  -19.063  10.103 edge"
F["Po " 20] = "84  Po Polonium     16247  -11.883  10.757 edge"
F["At " 16] = "85  At Astatine     14216  -18.784  10.081 edge"
F["At " 20] = "85  At Astatine     16787  -12.007  10.679 edge"
F["Rn " 16] = "86  Rn Radon        14622  -18.663  10.054 edge"
F["Rn " 21] = "86  Rn Radon        17340  -11.749  10.609 edge"
F["Fr " 17] = "87  Fr Francium     15034  -18.475  10.029 edge"
F["Fr " 21] = "87  Fr Francium     17909  -11.846  10.524 edge"
F["Ra " 17] = "88  Ra Radium       15447  -18.590  10.015 edge"
F["Ra " 22] = "88  Ra Radium       18487  -11.732  10.428 edge"
F["Ac " 17] = "89  Ac Actinium     15874  -18.287   9.988 edge"
F["Ac " 23] = "89  Ac Actinium     19086  -11.670  10.341 edge"
F["Th "  6] = "90  Th Thorium       4049  -23.021  31.611 edge"
F["Th " 20] = "90  Th Thorium      16303  -18.447   9.968 edge"
F["Th " 25] = "90  Th Thorium      19696  -11.637  10.266 edge"
F["Pa "  6] = "91  Pa Protactinium  4176  -22.092  33.098 edge"
F["Pa " 20] = "91  Pa Protactinium 16736  -18.320   9.964 edge"
F["Pa " 26] = "91  Pa Protactinium 20316  -11.818  10.184 edge"
F["U  "  6] = "92  U  Uranium       4306  -20.788  35.007 edge"
F["U  " 21] = "92  U  Uranium      17169  -18.441   9.957 edge"
F["U  " 26] = "92  U  Uranium      20950  -11.751  10.094 edge"
F["Np "  1] = "93  Np Neptunium        -     -       -    "
F["Pu "  1] = "94  Pu Plutonium        -     -       -    "
F["Am "  1] = "95  Am Americium        -     -       -    "
F["Cm "  1] = "96  Cm Curium           -     -       -    "
F["Bk "  1] = "97  Bk Berkelium        -     -       -    "
F["Cf "  1] = "98  Cf Californium      -     -       -    "
F["Es "  1] = "99  Es Einsteinium      -     -       -    "
F["Fm "  1] = "100 Fm Fermium          -     -       -    "
F["Md "  1] = "101 Md Mendelevium      -     -       -    "
F["No "  1] = "102 No Nobelium         -     -       -    "
F["Lr "  1] = "103 Lr Lawrencium       -     -       -    "

}

Z = iselement(\$1) {
    # element symbol or name given
    Ee = substr(element[Z], 1, 3)
    
    if(energy = isenergy(\$2))
    {
	# print edge f' and f'' closest to this energy
    
	bestdist = 9999999
	for(i=1;i<40;++i)
	{
	    split(F[Ee i], e)
	    dist = sqrt((e[4] - energy)^2)

 	    if(dist < bestdist)
	    {
		bestdist = dist
		entry=F[Ee i];
	    }
	}
	print entry;
    }
    else
    {
	# print out full "spectrum" for this element
	for(i=1;i<40;++i)
	{
	    if(F[Ee i]) print F[Ee i]
	}
    }
}


# get element with edge closest to a given energy
energy = isenergy(\$1) {

    mindist = 99999999
    for(val in F)
    {
	n=split(F[val],e)
	
	# skip non-edge values here
	if(e[n] == "edge")
	{
	    dist = sqrt((e[4] - energy)^2)
	    
	    if((dist < mindist)||((dist == mindist)&&(e[1] < bestelement+0)))
	    {
		mindist = dist
		bestelement = F[val]
	    }
	}
    }
    print bestelement
}

\$0 == "all elements" {
    for(Z=1;Z<104;++Z)
    {
	Ee = substr(element[Z], 1, 3)
	for(i=1;i<40;++i)
	{
	    if(F[Ee i]) print F[Ee i];
	}
    }
}

function isenergy(number)
{
    energy = number
    if(number > 0 && number < 1000)
    {
	if(number+0>0) energy = 12398.4245/number
    }
    if((energy < 1000)||(energy > 25000)) energy = 0

    return energy
}

function iselement(string)
{
    reply = 0
    for(z in element)
    {
	split(element[z], w)
	if((string == w[1])||(tolower(string) == tolower(w[2])))
	{
	    reply = z
	    break;
	}
    }

    return reply
}
EOF-elements
chmod a+x ${tempfile}elements.awk









cat << EOF-ginger >! ${tempfile}ginger.awk
#! $nawk -f
#
#   Ginger:  an English to Elvish(TM) translator
#
#   Process/identify crystallographic parameters from
#	free-form (english) input, 
#   and print them out in MOSFLMish format
#
#   The parameter keyword will always be printed if a parameter is mentioned, 
#   but the value may or may not be given.  The parent program is expected
#   to ask the user for these values explicitly
#
BEGIN{
    line = ""
}

# preprocess
{
    line = ""
    # remove weird characters
    for(i=1;i<=length(\$0);++i)
    {
	c = substr(\$0,i,1);
	
	# separate stuff from puctuation
	if(c !~ /[a-zA-Z0-9.\\-]/) c = " " c " "
	# periods (not decimals)
	if((c == ".") && (substr(\$0,i+1,1) !~ /[a-zA-Z0-9.\\-]/)) c = ""
	# commas in numbers
	if((c == ",") && (substr(\$0,i-1,1) ~ /[0-9]/) && (substr(\$0,i+1,3) ~ /[0-9][0-9][0-9]/)) c = ""
	# other punctuation
	if(c ~ /[\\"\\'\\\`=,;:~]/) c = " " c " "
#	if(c ~ /[,;:]/) c = " " c " "
	
	# separate numbers from letters/labels
	if((c ~ /[0-9.]/)&&(substr(\$0,i+1,1) !~ /[0-9.]/)) c = c " "
#	if((c ~ /[0-9.\\-]/)&&(substr(\$0,i-1,1) !~ /[0-9.\\-PpCcIiFfRrHh]/)) c = " " c

	# separate beginning of negative numbers
#	if((c == "-") && (substr(\$0,i+1,1) ~ /[0-9.]/)) c = " " c

	line = line c
    }
    
    if(debug) print "1: " line

    
    # now re-parse the line
    nf = split(tolower(line), w)
    line = ""
    

    ###########################################################
    #
    # resolve all numbers,  converting to standard units
    #
    ###########################################################
    for(i=1;i<=nf;++i)
    {
	word = w[i]
	num = ""
	
	# wordy people
	last = 0
	do{
	    oldnum = num
	    if(last != 1)
	    {
		if(w[i] == "one")       {num += 1 ; last = 1}
		if(w[i] == "two")       {num += 2 ; last = 1}
		if(w[i] == "three")     {num += 3 ; last = 1}
		if(w[i] == "four")      {num += 4 ; last = 1}
		if(w[i] == "five")      {num += 5 ; last = 1}
		if(w[i] == "six")       {num += 6 ; last = 1}
		if(w[i] == "seven")     {num += 7 ; last = 1}
		if(w[i] == "eight")     {num += 8 ; last = 1}
		if(w[i] == "nine")      {num += 9 ; last = 1}
	    }
	    if((last != 10)&&(last != 1))
	    {
		if(w[i] == "ten")       {num += 10; last = 1}
		if(w[i] == "eleven")    {num += 11; last = 1}
		if(w[i] == "twelve")    {num += 12; last = 1}
		if(w[i] == "thirteen")  {num += 13; last = 1}
		if(w[i] == "fourteen")  {num += 14; last = 1}
		if(w[i] == "fifteen")   {num += 15; last = 1}
		if(w[i] == "fifthteen") {num += 15; last = 1}
		if(w[i] == "sixteen")   {num += 16; last = 1}
		if(w[i] == "seventeen") {num += 17; last = 1}
		if(w[i] == "eighteen")  {num += 18; last = 1}
		if(w[i] == "eightteen") {num += 18; last = 1}
		if(w[i] == "ninteen")   {num += 19; last = 1}
		if(w[i] == "nineteen")  {num += 19; last = 1}

		if(w[i] == "twenty")    {num += 20; last=10}
		if(w[i] == "thirty")    {num += 30; last=10}
		if(w[i] == "fourty")    {num += 40; last=10}
		if(w[i] == "fifty")     {num += 50; last=10}
		if(w[i] == "sixty")     {num += 60; last=10}
		if(w[i] == "seventy")   {num += 70; last=10}
		if(w[i] == "eighty")    {num += 80; last=10}
		if(w[i] == "ninty")     {num += 90; last=10}
		if(w[i] == "ninety")    {num += 90; last=10}
	    }
	    
	    # orders of magnitude
	    if(last != 10)
	    {
		if(w[i] == "hundred")  {last=0; if(num == "") num = 1; num = (num - (num%100)) + 100*(num%100)}
		if(w[i] == "thousand") {last=0; if(num == "") num = 1; num = (num - (num%1000)) + 1000*(num%1000)}
		if(w[i] == "million")  {last=0; if(num == "") num = 1; num = (num - (num%1000000)) + 1000000*(num%1000000)}
	    }
	    ++i
	
	} while ((num != oldnum)||((num+0 != 0)&&(w[i-1] == "and")))
	--i
	if(num != "") --i
	
	# actual, real numbers
	if((w[i] ~ /[0-9]/)&&(w[i] !~ /[a-z]/)) num = w[i]+0
	
	if(num != "")
	{
	    # convert numbers
	    ++i
	    oldnum = num
	    
	    if((w[i] ~ /^million/)) num *= 1000
	    if((w[i] ~ /^thousa/)) num *= 1000
	    if((w[i] ~ /^hundred/)) num *= 100
	
	    # percent
	    if((w[i] == "%")||(w[i] ~ /^per[c\\-]/))
	    {
		num /= 100
		# could be a solvent content
#		num = num " SOLVENT"
	    }
	    # cm to mm
	    if(w[i] == "cm") num *= 10
	    # um to mm
	    if((w[i] == "um")||(w[i] ~ /^micro/)) num /= 1000
	
	    # some units have unambiguous meaning
	
	    # molecular weights
	    if((w[i] == "kd")||(w[i] == "d")||(w[i] == "amu")||(w[i] ~ /^dalton/))
	    {
		if(w[i] ~ /^k/) num *= 1000
		num = "MASS " num
	    }
	    if((w[i] == "g")&&(w[i+1] == "/")&&(w[i+2] ~ /^mol/))
	    {
		if(num > 1000) num = "MASS " num
	    }
	    if((w[i] == "aa")||(w[i] ~ /^amino/))
	    {
		num *= 120
		num = "MASS " num
	    }
	    
	    # x-ray energies
	    if((w[i] == "ev")||(w[i] == "kev")||(w[i] == "electronvolt"))
	    {
		if(w[i] == "kev") num *= 1000
		num = "ENERGY " num
	    }
	    
	    # angstrom units (imortant without explicit label)
	    if((w[i] == "a")||(w[i] == "\\305")||(w[i] ~ /^angsr/))
	    {
		# if wavelength is mentioned, then the show is off
		if((\$0 !~ /wave/)&&(\$0 !~ /x-ray/)&&(num+0 <= 10)) num = "RESO " num
	    }

	    # this word might mean something else
	    if(num == oldnum) --i
	    
	    word = num
	    
	    # end of number interpreter
	}

	line = line " " word
    }
    
    if(debug) print "2: " line
    
    # now re-parse the line (again)
    nf = split(tolower(line), w)
    line = ""
    
    ###########################################################
    #
    #	reduce all words to a simplified vocabulary
    #	    (and detect "mentioning" of variables)
    #
    ###########################################################
    for(i=1;i<=nf;++i)
    {
#	word = "blah"
	word = ""
	# always interested in numbers
	if((w[i] ~ /[0-9]/)&&(w[i] !~ /[a-z]/))
	{
	    word = w[i]+0
	}
	
	# Crystal/sample properties
	
	# Space group
	if((w[i] == "sg")||(w[i] ~ /^space/))
	{
	    word = "SG"
	    SG = " "
	}
	if(w[i] ~ /^[pcifr][1-6]/)
	{
	    word = toupper(w[i])
	    SG = " "
	}
	# unit cell
	if(w[i] == "cell")
	{
	    word = "CELL"
	}
	# molecular weight
	if((w[i] ~ /molec/)||(w[i] == "mass")||(w[i] ~ /^weigh/)||(w[i] ~ /^protein/)||(w[i] ~ /^peptide/)||(w[i] ~ /^chain/))
	{
	    word = "MASS"
	    if((w[i] !~ /^protein/)&&(w[i] !~ /^peptide/)&&(w[i] !~ /^chain/))
	    {
		MASS = " "
	    }
	}
	# asu could be given in "monomers"
	if((w[i] == "asu")||(w[i] ~ /^asymmetric/)||(w[i] ~ /^chain/)||(w[i] ~ /mer\$/))
	{
	    word = "ASU"
	    if(w[i] !~ /mer\$/)
	    {
		word = "ASU"
		ASU = " "
	    }
	    if(w[i] == "monomer")  word = "1 ASU"
	    if(w[i] == "dimer")    word = "2 ASU"
	    if(w[i] == "trimer")   word = "3 ASU"
	    if(w[i] == "tetramer") word = "4 ASU"
	    if(w[i] == "pentamer") word = "5 ASU"
	    if(w[i] == "hexamer")  word = "6 ASU"
	    if(w[i] == "heptamer") word = "7 ASU"
	    if(w[i] == "octamer")  word = "8 ASU"
	    if(w[i] == "nonamer")  word = "9 ASU"
	    if(w[i] == "decamer")  word = "10 ASU"
	}
	# solvent content
	if((w[i] ~ /solven/)||(w[i] == "content"))
	{
	    word = "SOLVENT"
	    SOLVENT = " "
	}
	if((w[i] ~ /matthew/)||(w[i] == "vm"))
	{
	    word = "VM"
	    VM = " "
	}
	# metal sites?
	if((w[i] == "site")||(w[i] == "sites")||(w[i] ~ /^metal/)||(w[i] ~ /^deriva/))
	{
	    word = "SITES"
	    if(w[i] !~ /^deriva/) SITES = " "
	}
	# resolution
	if((w[i] ~ /^reso/)||(w[i] ~ /^diffra/)||(w[i] == "res"))
	{
	    word = "RESO"
	    if(w[i] ~ /^reso/)
	    {
		RESO = " "
	    }
	    if(w[i-1] ~ /^low/)
	    {
		RESO = ""
#		word = "low " RESO
	    }
	}
	# mosaicity
	if(w[i] ~ /^mosaic/)
	{
	    word = "MOSAIC"
	}


	# X-ray properties

	# wavelength/energy
	if((w[i] ~ /^wave/)||(w[i] ~ /^x-ray/)||(w[i] ~ /^lambda/)||(w[i] ~ /^lamda/))
	{
	    word = "WAVE"
	    if(w[i] ~ /^wave/) WAVE = " "
	}
	if(w[i] ~ /^energ/)
	{
	    word = "ENERGY"
	    WAVE = " "
	}
	if((w[i] ~ /^disper/)||(w[i] ~ /^spectr/)||(w[i] ~ /^bandwidth/))
	{
	    word = "DISPERSION"
	    DISPER = " "
	}
	if((w[i] ~ /^diverge/)||(w[i] ~ /^crossfire/)||(w[i] ~ /^crossection/))
	{
	    word = "DIVERGENCE"
	    DIVER = " "
	}
	if((w[i] ~ /^polar/)||(w[i] ~ /^monochro/)||(w[i] ~ /^graphi/)||(w[i] ~ /^mirror/)||(w[i] ~ /^pinhole/))
	{
	    # hadle word-based arguments here
	    temp = ""
	    if(w[i] ~ /^monochro/) temp = "MONOCHROMATOR"
	    if(w[i] ~ /^graphi/)   temp = "MONOCHROMATOR"
	    if(w[i] ~ /^mirror/)   temp = "MIRRORS"
	    if(w[i] ~ /^pinhole/)  temp = "MIRRORS"

	    POLAR = POLAR " " temp
	    if(temp == "") word = "POLAR"
	}

	
	# Detector proterties
	
	# mentioning detector
	if((w[i] ~ /^detect/))
	{
	    word = "DETECTOR"
	    DETECTOR = " "
	}
	# distance
	if((w[i] ~ /^dist/)||(w[i] ~ /^xtf/))
	{
	    word = "DISTANCE"
	}
	# beam center
	if((w[i] == "direct")||(w[i] == "beam")||(w[i] == "center"))
	{
	    word = "CENTER"
	    if(w[i] != "beam") CENTER = " "
	}
	# 2theta
	if((w[i] ~ /^sw[iu]ng/)||(w[i] ~ /^twothe/)||(w[i] ~ /theta/))
	{
	    word = "TWOTHETA"
	    TWOTHETA = " "
	}
	# quantum gain
	if((w[i] ~ /^gain/)||(w[i] ~ /^quantum/)||(w[i] ~ /^yeild/))
	{
	    word = "GAIN"
	}
	# starting phi
	if((w[i] ~ /^phi/)||(w[i] ~ /^start/))
	{
	    word = "PHI"
	    if(w[i] ~ /^phi/) PHI = " "
	}
	# oscillation
	if((w[i] ~ /^osc/)||(w[i] ~ /^step/)||(w[i] ~ /^delta/)||(w[i] == "angle"))
	{
	    word = "OSC"
	    if((w[i] ~ /^osc/)) OSC = " "
	}
	
	
	# hard-to-determine program options
	if((w[i] ~ /^sdcorr/))
	{
	    # scala's SDCORRECTION card
	    word = "SDCORR"
	    SDCORR = " "
	}
	
	# (potentially) complex logic flags
	if(w[i] == "fix")
	{
	    word = "FIX"
	}
	if((w[i] == "fit")||(w[i] == "unfix")||(w[i] == "free"))
	{
	    word = "NEG FIX"
	}
	if((w[i] == "not")||(w[i] == "no")||(w[i] == "stop")||(w[i] == "wrong")||(w[i] == "cease"))
	{
	    word = "NEG"
	}
	# handle apostraphe in don't can't shouldn't, etc.
	if((w[i-1] ~ /n\$/)&&(w[i] == "'")&&(w[i+1] == "t"))
	{
	    word = "NEG"
	}
	if((w[i] == "off")||(w[i] == "wrong"))
	{
	    word = "NEG-"
	}

	line = line " " word
    }

    nf = split(line, w)
    line = ""
    

    ###########################################################
    #
    #	assign values to variables (cell and SG)
    #
    ###########################################################
    # look for unit cells (six, consecutive numbers)
    for(i=1;i<=nf;++i)
    {
	# look for space groups
	if((w[i] ~ /^[PpCcIiFfRrHh][1-6]/)&&(w[i-1] != "NEG"))
	{
	    SG = SG " " toupper(substr(w[i],1,1)) substr(w[i],2)+0
	    # don't confuse this with a number
	    w[i] = ""
	}	

	# look for pattern of six, consecutive numbers (could only be a unit cell)
	if((w[i]+0 > 5)&&(w[i+1]+0 > 5)&&(w[i+2]+0 > 5)&&(w[i+3]+0 > 5)&&(w[i+4]+0 > 5)&&(w[i+5]+0 > 5))
	{
	    if((w[i]+0 < 1000)&&(w[i+1]+0 < 1000)&&(w[i+2]+0 < 1000)&&(w[i+3]+0 < 175)&&(w[i+4]+0 < 175)&&(w[i+5]+0 < 175))
	    {
		CELL = w[i]+0 " " w[i+1]+0 " " w[i+2]+0 " " w[i+3]+0 " " w[i+4]+0 " " w[i+5]+0
	 
		# these numbers don't mean anything else
		i += 5
	    }
	}
	else
	{
	    # reassemble non-cell words
	    line = line " " w[i]
	}
    }
    
    
    if(debug) print "3: " line

    nf = split(line, w)
    line = ""
    
    ###########################################################
    #
    #	assign likely values to keyworded variables
    #
    ###########################################################
    # now go through each KEY word, and see if we can find a value for it
    for(i=1;i<=nf;++i)
    {
	# fix/unfix parameters
	if((w[i] == "FIX"))
	{
	    # fix/unfix logic
	    FIX = FIX " " w[i+1]
	
	    # next keyword/value gets cancelled
#	    w[i+1] = ""
	}
    
	# negation cancels next word,  wether it be a number or a word
	if((w[i-1] == "NEG")||(w[i+1] == "NEG-"))
	{
	    # fix/unfix logic
	    if(w[i] == "FIX")
	    {
		FIX = "NOT " FIX
	    }
	
	    # keyword/value gets cancelled
	    w[i] = ""
	}
    
	# size of the asymmetric unit
	if(w[i] == "ASU")
	{
	    num = ""
	    # check for valid numbers next to this keyword
	    if((w[i-1] ~ /^[0-9]/)&&(w[i-1]+0 >= 1))
	    {
		num = w[i-1]+0
		
		# convert/interpret range
		if((num+0 < 100)&&(num+0 == int(num)))
		{
		    ASU = num
		    # numbers are used only once
		    w[i-1] = ""
		    w[i] = ""
		}
		# could be a straightforward mass
		if(num+0 > 1000)
		{
		    MASS = MASS " " num
#		    ASU = 1
		    
		    # numbers are used only once
		    w[i-1] = ""
		    w[i] = ""
		}
	    }
	}
	
	if(w[i] == "SOLVENT")
	{
	    num = ""
	    # check for valid numbers next to this keyword
	    if((w[i-1]+0 > 0.05)&&(w[i-1]+0 < 0.95))
	    {
		num = w[i-1]+0
		w[i-1] = ""
		w[i] = ""
	    }
	    else
	    {
		if((w[i+1]+0 > 0.05)&&(w[i+1]+0 < 0.95))
		{
		    num = w[i+1]+0
		    w[i+1] = ""
		    w[i] = ""
		}
	    }
	    if((num > 0.05)&&(num < 0.95))
	    {
		# compute Vm from solvent content
		VM = VM " " 1.24/(1 - num)
	    }
	}
	
	# metal sites
	if(w[i] == "SITES")
	{
	    num = w[i-1]+0
	    # check for valid numbers before this keyword
	    if((num > 0)&&(num < 200)&&(num == int(num))||w[i-1]=="0")
	    {
		SITES = SITES " " num
		w[i-1] = ""
		w[i] = ""
	    }
	    else
	    {
		num = w[i+1]+0
		if((num > 0)&&(num < 200)&&(num == int(num))||w[i+1]=="0")
		{
		    SITES = SITES " " num
		    w[i+1] = ""
		    w[i] = ""
		}
	    }
	}
	
	# Oscillation angle
	if(w[i] == "OSC")
	{
	    # check for valid numbers before this keyword
	    if((w[i-1]+0 > 0)&&(w[i-1]+0 < 30))
	    {
		OSC = OSC " " w[i-1]+0
		w[i-1] = ""
		w[i] = ""
	    }
	    else
	    {
		# maybe after
		if((w[i+1]+0 > 0)&&(w[i+1]+0 < 30))
		{
		    OSC = OSC " " w[i+1]+0
		    w[i+1] = ""
		    w[i] = ""
		}
	    }
	}
	
    }
    
    if(debug) printf "%s",  "4: "

    # prefix keywords
    for(i=1;i<=nf;++i)
    {
    if(debug) printf " %s", w[i]
    
	########################
	# crystal properties
	
	# unit cell mentioned
	if((w[i] == "CELL")&&(CELL == ""))
	{
	    CELL = CELL " "

	    # check for partial cell?
	    for(j=0; j<=6; ++j)
	    {
		if((w[i+j]+0 >5)&&(w[i+j]+0 < 1000))
		{
		    if((w[i+j]+0 < 175)||(j < 3))
		    {
			# assemble numbers that might be incomplete cell dimensions
			CELL = CELL " " w[i+j]+0
			w[i+j] = ""
		    }
		}
	    }
	}
	
#	# outer resolution limit
#	if(w[i] == "RESO")
#	{
#	    # check for valid numbers next to this keyword
#	    if((w[i+1]+0 > 0.1)&&(w[i+1]+0 <= 10))
#	    {
#		RESO = RESO " " w[i+1]+0
#		w[i+1] = ""
#	    }
#	}
	
	# resolution limits
	if(w[i] == "RESO")
	{
	    # check for valid numbers next to this keyword
	    if((w[i+1]+0 > 0.1)&&(w[i+1]+0 <= 10))
	    {
		RESO = RESO " " w[i+1]+0
		w[i+1] = ""
		
		# perhaps a lo-res limit?
		if((w[i+2]+0 > 10)&&(w[i+2]+0 <= 1000))
		{
		    loRESO = loRESO " " w[i+2]+0
		    w[i+2] = ""
		    # cancel mentioning hi-res
		    if(RESO+0 == 0) RESO = ""
		}
	    }
	    # check for valid numbers next to this keyword
	    if((w[i+1]+0 > 10)&&(w[i+1]+0 <= 1000))
	    {
		loRESO = loRESO " " w[i+1]+0
		w[i+1] = ""
		# cancel mentioning hi-res
		if(RESO+0 == 0) RESO = ""
	    }
	}
	
	# mosaic spread
	if(w[i] == "MOSAIC")
	{
	    MOSAIC = MOSAIC " " 
	    # check for valid numbers next to this keyword
	    if((w[i+1]+0 > 0.01)&&(w[i+1]+0 <= 10))
	    {
		MOSAIC = MOSAIC w[i+1]+0
		w[i+1] = ""
	    }
	}
	
	# size of the asymmetric unit
	if(w[i] == "ASU")
	{
	    num = ""
	    # check for valid numbers next to this keyword
	    if((w[i+1] ~ /^[0-9]/)&&(w[i+1]+0 >= 1))
	    {
		# look AFTER keyword
		num = w[i+1]+0

		# convert/interpret range
		if((num+0 >= 1)&&(num+0 < 100)&&(num+0 == int(num)))
		{
		    ASU = num
		    
		    # numbers are used only once
		    w[i+1] = ""
		}
		# could be a straightforward mass
		if(num+0 > 1000)
		{
		    MASS = MASS " " num
		    ASU = 1
		    
		    # numbers are used only once
		    w[i-1] = ""
		}
	    }
	}
	
	# molecular weight of the protein
	if(w[i] == "MASS")
	{
	    num = ""
	    # check for valid numbers next to this keyword
	    if((w[i+1] ~ /^[0-9]/)&&(w[i+1]+0 > 1))
	    {
		num = w[i+1]+0
		
		# convert/interpret range
		if(num < 1) num = 0
		
		# assume < 1000 means kD
		if(num < 1000) num *= 1000
		
		if(num+0 > 1000) 
		{
		    MASS = MASS " " num
		    
		    # numbers are used only once
		    w[i+1] = ""
		}
	    }
	}
	
	# solvent content
	if(w[i] == "VM")
	{
	    # check for valid numbers next to this keyword
	    if((w[i+1]+0 > 1)&&(w[i+1]+0 <= 10))
	    {
		VM = VM " " w[i+1]
		w[i+1] = ""
	    }
	}
	
	########################
	# X-ray properties
	
	# Wavelength
	if(w[i] == "WAVE")
	{
	    num = ""
	    # check for valid numbers next to this keyword
	    if((w[i+1]+0 > 0.1)&&(w[i+1]+0 < 30000))
	    {
		num = w[i+1]+0
		
		# assume > 10 just means energy
		if(num > 10)
		{
		    # assume < 1000 means keV
		    if(num < 1000) num *= 1000
		    num = 12398.4245 / num
		}
		    
		if((num > 0.1)&&(num < 10))
		{
		    WAVE = WAVE " " num
		    
		    # numbers are used only once
		    w[i+1] = ""
		}
	    }
	}
	if(w[i] == "ENERGY")
	{
	    num = ""
	    # check for valid numbers next to this keyword
	    if((w[i+1]+0 > 3)&&(w[i+1]+0 < 30000))
	    {
		num = w[i+1]+0
		
		# assume < 1000 means keV
		if(num < 1000) num *= 1000
		num = 12398.4245 / num
		
		if((num > 0.1)&&(num < 10))
		{
		    WAVE = WAVE " " num
		    
		    # numbers are used only once
		    w[i+1] = ""
		}
	    }
	}
	
	# Scanner gain
	if(w[i] == "GAIN")
	{
	    GAIN = GAIN " "
	    # check for valid numbers next to this keyword
	    if((w[i+1]+0 > 0.0001)&&(w[i+1]+0 < 1000000))
	    {
		GAIN = GAIN w[i+1]+0
		w[i+1] = ""
	    }
	}
	
	# beam polarization
	if(w[i] == "POLAR")
	{
	    # check for actual numbers next to this keyword (word arguments hanlded above)
	    if((w[i+1] ~ /[0-9]/)&&(w[i+1]+0 > -1)&&(w[i+1]+0 < 1))
	    {
		POLAR = POLAR " " w[i+1]+0
		w[i+1] = ""
	    }
	}
	
	# spectral dispersion (bandwidth)
	if(w[i] == "DISPERSION")
	{
	    # check for valid numbers next to this keyword
	    if(((w[i+1]>0)&&(w[i+1]+0 < 0.01))||(w[i+1]+0 > 100))
	    {
		num = w[i+1]+0
		
		# assume > 1 means bandwidth
		if(num > 1) num = 1/num
		
		if((num > 0)&&(num < 0.01))
		{
		    DISPER = DISPER " " w[i+1]+0
		    w[i+1] = ""
		}
	    }
	}
	
	# beam divergence (degrees)
	if(w[i] == "DIVERGENCE")
	{
	    # check for valid numbers next to this keyword
	    if((w[i+1] ~ /[0-9]/)&&(w[i+1]+0 >= 0)&&(w[i+1]+0 < 2))
	    {
		DIVER = DIVER " " w[i+1]+0
		w[i+1] = ""
		
		# possible vertical divergence too?
		if((w[i+2] ~ /[0-9]/)&&(w[i+2]+0 >= 0)&&(w[i+2]+0 < 2))
		{
		    DIVER = DIVER " " w[i+2]+0
		    w[i+2] = ""
		}
	    }
	}
	
	
	
	########################
	# Collection strategy
	
	# Starting phi angle
	if(w[i] == "PHI")
	{
	    # check for valid numbers next to this keyword
	    if((w[i+1] ~ /[0-9]/)&&(w[i+1]+0 > -360)&&(w[i+1]+0 < 720))
	    {
		PHI = PHI " " w[i+1]+0
		w[i+1] = ""
	    }
	}
	
	# XTF distance
	if(w[i] == "DISTANCE")
	{
	    DIST = DIST " " 
	    # check for valid numbers next to this keyword
	    if((w[i+1]+0 > 5)&&(w[i+1]+0 < 10000))
	    {
		DIST = DIST w[i+1]+0
		w[i+1] = ""
	    }
	}
	
	# beam center on detector face
	if(w[i] == "CENTER")
	{
	    # check for TWO valid numbers next to this keyword
	    if((w[i+1] ~ /[0-9]/)&&(w[i+1]+0 > -200))
	    {
		if((w[i+2] ~ /[0-9]/)&&(w[i+2]+0 > -200))
		{
		    CENTER = " " w[i+1]+0 " " w[i+2]+0
		    w[i+1] = ""
		    w[i+2] = ""
		}
	    }
	}
	
	# twotheta (detector swing) angle
	if(w[i] == "TWOTHETA")
	{
	    # check for actual numbers next to this keyword (word arguments hanlded above)
	    if((w[i+1] ~ /[0-9]/)&&(w[i+1]+0 > -120)&&(w[i+1]+0 < 120))
	    {
		TWOTHETA = TWOTHETA " " w[i+1]+0
		w[i+1] = ""
	    }
	}
	
	
	
	# scala's SDCORRection card
	if(w[i] == "SDCORR")
	{
	    # check for valid numbers next to this keyword
	    if((w[i+1]+0 > 0.9)&&(w[i+1]+0 < 5))
	    {
		# must be at least two numbers
		if((w[i+2] ~ /[0-9]/)&&(w[i+2]+0 >= 0)&&(w[i+2]+0 < 10))
		{
		    if((w[i+2]+0 < 0.5)&&((w[i+3] !~ /[0-9]/)||(w[i+3]+0 > 0.5)))
		    {
			# just these two
			SDCORR = w[i+1]+0 " " w[i+2]+0
			w[i+1] = ""
			w[i+2] = ""
		    }
		    if((w[i+3] ~ /[0-9]/)&&(w[i+3]+0 >= 0)&&(w[i+3]+0 < 0.5))
		    {
			# all three
			SDCORR = w[i+1]+0 " " w[i+2]+0 " " w[i+3]+0
			w[i+1] = ""
			w[i+2] = ""
			w[i+3] = ""
		    }
		}
	    }
	}
    }

    if(debug) print ""
    ###########################################################
    #
    #	assign UNlikely values to keyworded variables
    #	    ("wrong" side of uninitialized keyword)
    #
    ###########################################################
    # not done



#    ###########################################################
#    #
#    #	Look for outright filenames
#    #
#    ###########################################################
#    line = ""
#    # remove punctuation
#    for(i=1;i<=length(\$0);++i)
#    {
#	c = substr(\$0,i,1);
#	
#	# separate stuff from puctuation
#	if(c !~ /[a-zA-Z0-9._\\/\\-]/) c = " "
#	if(c ~ /[\\"\\'\\\`,;:]/) c = " "
#	if((c ~ /./) && (substr(\$0,i+1,1) !~ /[0-9]/)) c = " " c
#	if(c ~ /[,;:]/) c = " " c " "
#	
#	line = line c
#    }
#
#    nf = split(line, w)
#    line = ""
#
#    for(i=1;i<=nf;++i)
#    {
#	if((w[i-1] !~ /^[Nn][Oo]/)&&(w[i-2] !~ /^[Nn][Oo]/))
#	{
#	    if(! system("test -d " w[i] ))
#	    {
#		DIR = DIR " " w[i]
#	    }
#	    else
#	    {
#		if(! system("test -r " w[i] ))
#		{
#		    FILE = FILE " " w[i]
#		    
#		    if(! system("test -x " w[i] ))
#		    {
#			PROG = PROG " " w[i]
#		    }
#		}
#	    }
#	}
#    }
}


END{
    # give ASU in Daltons (once we know mass)
#    if((MASS)&&(! ASU)) ASU = " "
#    {
#	temp = ASU
#	if(ASU+0 == 0) temp = 1
#	if((MASS+0 > 1000)&&(ASU+0 < 100)) ASU = temp * MASS
#	if(ASU < MASS) ASU = MASS
#    }

    # input range: the usual
    if(SG)       print "SYMM   ", SG
    # input range: six, CONSECUTIVE numbers > 5
    if(CELL)     print "CELL     ", CELL
    # input range: 1->
    if(MASS)     print "MASS   ", MASS
    # input range: 1 - 5
    if(ASU)      print "ASU    ", ASU
    # input range: 1 - 5
    if(VM)       print "VM     ", VM
    # input range: integers
    if(SITES)    print "SITES  ", SITES
    # input range: 0.1 - 10
    if(RESO)     print "RESO   ", RESO
    # input range: 0.1 - 10
    if(loRESO)   print "loRESO ", loRESO
    # input range: 0.01 < mosaic < 10
    if(MOSAIC)   print "MOSAIC ", MOSAIC
    # input range: 0.1 -> 30000 (ang, eV, keV)
    if(WAVE)     print "WAVE   ", WAVE
    # input range: 0 < gain 
    if(GAIN)     print "GAIN   ", GAIN
    # input range: -1 < polar < 1
    if(POLAR)    print "POLAR  ", POLAR
    # input range: 0< disper < 0.01 or > 100
    if(DISPER)   print "DISPER  ", DISPER
    # input range: 0 < 2 (1-2 values)
    if(DIVER)    print "DIVER   ", DIVER
    # input range: anything
    if(PHI)      print "PHI     ", PHI
    # input range: osc > 0
    if(OSC)      print "OSC     ", OSC
    # input range: dist > 0
    if(DIST)     print "DIST    ", DIST
    # input range: two numbers
    if(CENTER)   print "CENTER  ", CENTER
    # input range: -180 -> 180
    if(TWOTHETA) print "TWOTHETA ", TWOTHETA
    # input range: word logic
    if(SDCORR)   print "SDCORR  ", SDCORR
    # input range: word logic
    if(FIX)      print "FIX     ", FIX
    # input range: word logic
    if(FILE)     print "FILE    ", FILE
    # input range: word logic
    if(TEMPLATE) print "TEMPLATE ", TEMPLATE
    # input range: word logic
    if(DIR)      print "DIR     ", DIR
    # input range: word logic
    if(PROG)     print "PROG    ", PROG

    # whatever is left
    if(REST)     print "REST    ", REST}
    
EOF-ginger
chmod a+x ${tempfile}ginger.awk




cat << EOF-labler >! ${tempfile}labler.awk
#! $nawk -f
#
#   Find a unique label for the set of items ending lines
#   among the words found on those lines
#
#
{
    if(\$NF != Wave[w]) ++w
    Wave[w] = \$NF;
    wave[NR] = \$NF

    for(i=1;i<NF;++i)
    {
	words[w] = words[w] " " \$i
    }
}

END{
    mtzs = NR
    waves = w

    # find the best label for each wavelength
    for(w=1;w<=waves;++w)
    {
	# search for unique label in words provided with WL
	NF=split(words[w], word)
	for(candidate=1;candidate<=NF;++candidate)
	{
	    # find a label for this wavelength
	    if(label[w] == "")
	    {
		# no label for this wavelength yet
		label[w] = word[candidate]
		
		# check and make sure this word is unique
		for(o=1;o<=waves;++o)
		{
		    # look through all other wavelengths
		    if(o != w)
		    {
			NOF=split(words[o], oword)
			for(other=1;other<=NOF;++other)
			{
			    if(label[w] == oword[other]) label[w] = ""
			}
		    }
		}
	    }
	}
	# safety catch
	if(label[w] == "") label[w] = w
    }

    # "shave" common characters
    while(! done && waves)
    {
	for(w=1;w<=waves;++w)
	{
	    shave = 0
	    for(o=1;o<=waves;++o)
	    {
		if((substr(label[w],1,1) == substr(label[o],1,1))&&(label[o] !~ /^[0-9]/))
		{
		    ++shave
		}
	    }

	    if((shave == waves)&&(shave > 1))
	    {
		for(s=1;s<=waves;++s)
		{
		    label[s] = substr(label[s], 2)
		}
	    }
	    else
	    {
		done =1
	    }
	}
	++catch
	if(catch > 1000) done =1
    }
    for(w=1;w<=waves;++w)
    {
	if(label[w] ~ /^f/) label[w] = "F" substr(label[w], 2)
	if(label[w] !~ /^F/) label[w] = "F" label[w]
	print Wave[w], w, label[w]
    }
}
EOF-labler
chmod a+x ${tempfile}labler.awk






cat << EOF-x2york >! ${tempfile}x2york.awk
#! $nawk -f
#
#	Tries to convert standard denzo .x files to "york" format, so
#	programs other than scalepack can read them.
#
#	Files already in york format pass through unharmed.
#
#	Non-denzo files (no unitary matrix in header) are blocked
#
#	.x files are missing:
#	- a "HEADER" record, which provides rotaprep with a batch number
#	- fractional partiality and phi value of spot center
#	- they also split the 5th york line into two parts
#
BEGIN{
    minX = 9999999
    minY = 9999999
    
    if(! add) add = 0;
    
    reading = 1
}

# read in ALL lines to an array
{
    ++n
    line[n] = \$0
    
    # recognize format of camera info line
    if(substr(\$0,1,48) == sprintf("%12.5f%12.5f%12.5f%12.5f", \$1, \$2, \$3, \$4))
    {
	start = \$1
	end   = \$2
    }
    
    # reformat long lines (5th line)
    if((length(\$0) > 80)&&(! york)&&(! /[^0-9 \.-]/))
    {
	line[n] = substr(\$0, 1, 48)
	++n
	line[n] = substr(\$0, 49)
    }
    
    # reformat spot entries
    if((n >5) && (length(\$0) >= 77) && (substr(\$0, 12, 3) ~ /[0-9] [01]/))
    {
	if((length == 77)&&(! york))
	{
	    # make-up missing data
	    if(substr(\$0, 13, 2) == " 0")
	    {
		fractioncalc = 1
	    }
	    else
	    {
		# do something creative here? 
		fractioncalc = 0.001
	    }
	    phispot = (start + end)/2
	    warn = "yes"
	    
	    # append these values to the line
	    line[n] = line[n] sprintf(" %5.3f%6.1f", fractioncalc, phispot)
	}

	# eliminate negative sigmas (rotaprep will remove them anyway)
	if(clean)
	{
	    sigma = substr(\$0, 38)+0
	    I = substr(\$0, 15, 8)+0
	    if((sigma <= 0)||(I > 999999)||(I < 3*sigma))
	    {
		# next line will overwrite this one
		--n
	    }
	}
	
	# collect stats on spots
	X = substr(\$0,50,7)+0
	Y = substr(\$0,57,7)+0
	
	if(X < minX) minX=X
	if(Y < minY) minY=Y
	if(X > maxX) maxX=X
	if(Y > maxY) maxY=Y
    }
    
    # get batch number from here
    if(\$0 ~ /^sector / )
    {
	if(! batch[page]) batch[page] = \$NF + batch_add

	# look for batch overlaps?
    }
}

# telltale signs of end-of-xfile (avoid random junk)
/^crossfire/ {
    # back up over this header line
    if(lastline < 10) lastline = n
}

/^HEADER/ {
    if((lastline < 10)&&(n > 10)) lastline = n-1
    york = "yes"
    
    # back up over this header line
    --n
}

FILENAME != oldFILENAME {
    if(lastline < 10) lastline = n-1

    oldFILENAME = FILENAME
}

# look for orientation matrix entry
\$0 == sprintf("%15.8f%15.8f%15.8f%10.6f%10.6f%10.6f", \$1, \$2, \$3, \$4, \$5, \$6) {
    # matrix entry shall mark new denzo batch
    if(determinant == "1.0000")
    {
	# then one file has already been read
    
	# print out the header line
	printf "HEADER %5d\\n", batch[page]+add
	
	# don't print out title (can screw up batch numbers)
#	line[1]=""	
    
	# dump lines for previous x file
	if(lastline < 10) lastline = n-2
	for(i=1;i<=lastline;++i)
	{
	    print line[i]
	}
	# update lines for current file
	line[1] = line[n-1]
	line[2] = \$0
	lastline = 0

	determinant = "undefined"
	++page
	n=2
    }

    # keep track of matrix row count
    ++matrix_rows
    
    # run down columns, initializing matrix
    for(j=1;j<=3;++j)
    {
	mat[matrix_rows,j]=\$(j+3)+0
    }
    
    # calculate determinant of (putative) unitary matrix, when we have it all
    if(matrix_rows == 3)
    {
	determinant = 0
	determinant += mat[1,1]*((mat[2,2]*mat[3,3])-(mat[2,3]*mat[3,2]));
	determinant -= mat[1,2]*((mat[2,1]*mat[3,3])-(mat[2,3]*mat[3,1]));
	determinant += mat[1,3]*((mat[2,1]*mat[3,2])-(mat[2,2]*mat[3,1]));
	determinant = sprintf("%.4f", determinant)
	matrix_rows = 0
    }
}



END {
    # last one
    
    if(determinant == "1.0000") 
    {
	# add header line
	printf "HEADER %5d\\n", batch[page]+add

	# don't print out title (can screw up batch numbers)
#	line[1]=""	
    
	# dump all lines
	for(i=1;i<=n;++i)
	{
	    print line[i]
	}
    }
    print ""
    if(warn)
    {
	print "WARNING: some "fraction" numbers had to be filled-in"
    }
    minX=int(minX-1)
    minY=int(minY-1)
    maxX=int(maxX+1)
    maxY=int(maxY+1)
    printf "X_range: %d %d Y_range: %d %d\\n", minX, maxX, minY, maxY
}

EOF-x2york
chmod a+x ${tempfile}x2york.awk







cat << EOF-parser >! ${tempfile}parser.awk
#! $nawk -f
#
#   Create a list of unique an non-overlapping batch numbers from a list
#   of:
#   wave, file, batch, phi, ending phi
#
#   like this:
#   1.54 raw.mtz 1  0 1.5 ...
#
#   step 1: identify runs of batches
#   step 2: link runs contiguous in phi & batch from separate input files (same wavelength)
#   step 3: renumber frames to avoid run collisions
#   
#	We will try to make the wave identifier in the ten-thousandth's place, and the
#	run identifier in the thousandths place, like this:
#	| second lowest wavelength
#	23022 <- batch 22
#	 | third run (in this wavelength)
#
#	Since we cannot exceed 99999 as a batch number, if there are more than
#	9 wavelengths, the thousandths place will simply count the runs
#	if you have > 99 runs, batches will be renumbered sequentially
#
#   parsing is fastest with input sorted on wavelength, sourcefile and then on batch :
#   sort +0n -1 +1 -2 +2n -3 
#   output is a list of runs and batch-adding procedures
#
#
#   printout look like this:
#   # wavelength 1 : 1 A = 12398.4 eV
#   # 11000 +     1 to    15 in filea
#   # 11000 +    16 to    30 in fileb
#   run     1 11001 to 11030
#   # 12000 +    32 to    46 in filec
#   run     2 12032 to 12046
#
#
# read in "batches file"
NF >= 5 {
    ++i
    
    # line should consist of: wave file batch phistart phiend etc...
    Line[i]	= \$0
    Wave[i]     = \$1+0
    File[i]     = \$2
    Batch[i]    = \$3+0
    Phi[i]      = \$4+0
    Endphi[i]   = \$5+0
    Rest[i]     = substr(\$0, index(\$0, \$4));  

    # Wave must be non-zero (to avoid dividing by zero)
    if(Wave[i] == 0)
    {
	Wave[i] = 1;
	if(Wave[i-1]+0 != 0) Wave[i] = Wave[i-1]
    }

    # keep track of wavelengths that have passed by
    if(! taken["wave " Wave[i]])
    {
	# this is the beginning of a new wavelength
	taken["wave " Wave[i]] = 1;
	++waves;
	Lambda[waves] = Wave[i];
    }
    
}

END{
    N = i;
    
    # sort wavelengths (stupid, n^2 sort, but n is small)
    for(wave=1; wave<=waves; ++wave)
    {
	for(other=1; other<=waves; ++other)
	{
	    if(Lambda[wave] <= Lambda[other]) ++wavenumber[Lambda[wave]];
	}
    }
    # reassign wave numbers
    for(lambda in wavenumber)
    {
	Lambda[wavenumber[lambda]] = lambda;
    }
    
    
    
        
    # phase 1: identify adjacent frames (restricted to same source file)
    
    # assign a wedge to each and every frame
    for(i=1;i<=N;++i)
    {
	# now run through all other frames, looking for the adjacent frame to i
	for(k=1; k<=N; ++k)
	{
	    # wraparound index (for speed)
	    j = (i+k-1)%N +1;
	    
	    # adjacent frames have same wavelength, adjacent batch and phi, and same source file.
	    if((Batch[i]+1 == Batch[j])&&\\
	       (Endphi[i]==Phi[j])&&\\
	       (File[i] == File[j])&&\\
	       (Wave[i] == Wave[j])&&\\
	       (before[j] == ""))
	    {
		# assign frame "j" to be after frame "i"
		 after[i] = j;
		before[j] = i;
		
		# we're done with frame i
		break;
	    }
	}
    }
    
    


    # phase 2: connect same-wl wedges that are consecutive in phi and batch number
    #          (regardless of source file)
    for(i=1;i<=N;++i)
    {
	if(after[i] == "")
	{
	    for(k=1; k<=N; ++k)
	    {
		# wraparound index (for speed)
		j = (i+k-1)%N +1;
	    
		# just look at dangling ends this time
		if((after[i]=="")&&(before[j]=="")&&\\
		    (Batch[i]+1 == Batch[j])&&\\
		    (Endphi[i]==Phi[j])&&\\
		    (Wave[i] == Wave[j]))
		{
		    after[i] = j;
		    before[j] = i;
		    
		    break;
		}
	    }
	}
    }
    
    
    
    # phase 3: define runs
    for(i=1;i<=N;++i)
    {
	# look for unassigned frames
	if(Run_number[i]=="")
	{
	    # count how many runs
	    ++run;
	    
	    # trace back to "beginning" of this run (ergodicity? yikes!)
	    start[run]=i;
	    while((before[start[run]]!="")&&\\
	          (before[start[run]] != i)&&\\
		  (Run_number[start[run]]==""))
	    {
		start[run] = before[start[run]];
	    }
	    before[start[run]]="";
	    
	    # now trace to the "end" of this run (again, ergodicity?)
	    end[run] = start[run];
	    while((after[end[run]] != "")&&\\
	          (after[end[run]] != start[run])&&\\
		  (Run_number[after[end[run]]]==""))
	    {
		# claim this frame for this run
		Run_number[end[run]] = run;
		
		end[run] = after[end[run]];
	    }
	    after[end[run]]="";
	    Run_number[end[run]] = run;
	    
	    
	    # now that we've gotten this far (I hope)
	    # figure out which files are involved in this run
	    
#	    printf "run %4d  %4d in %4s to %4d in %s\\n", run, Batch[start[run]], File[start[run]], Batch[end[run]], File[end[run]]
	}
    }
    runs = run;
    
    
    ##########################################################################
    
    # phase 4: renumber batches in the prettiest way possible
    
    # gather information on batch layout, to see which renumbering scheme will work
    # each run should contain consecutive batches from start[] to end[]
    for(wave=1; wave <= waves; ++wave)
    {
	for(run=1; run <= runs; ++run)
	{
	    if(wave == wavenumber[Wave[start[run]]])
	    {
		# compute intra-wave run counter
		++Runs[wave];
		waverun[run] = Runs[wave];
		
		# make unique output run numbers (for scala)
		++outrun;
		Outrun[run] = outrun;
		
		# statistics on runs
		size[run] = Batch[end[run]] - Batch[start[run]] +1;
	    
		wave_size[wave] += size[run];	
		if(size[run] > maxsize[wave]) maxsize[wave] = size[run];
		if(Batch[end[run]] > maxBatch[wave]) maxBatch[wave] = Batch[end[run]];
	    }
	}
    }
    
    # do not exceed batch no. 99999
    plan = "A";
    if(waves > 9) plan = "B";
    for(wave=1; wave <= waves; ++wave)
    {
	if(Runs[wave] > 9)
	{
	    plan = "B"
	}
    }
    
    if(runs > 99) plan = C;
    
    # plan A	- nice, single digit indicators of wave and wedge
    if(plan == "A")
    {
	# nice, two-digit wave/wedge identifier
	for(run=1; run <= runs; ++run)
	{
	    wave = wavenumber[Wave[start[run]]]   
	    add[run] = 10000*wave + 1000*waverun[run] - 1000*int(Batch[start[run]]/1000);
	    
	    # index by scala run? 
#	    if(runs < 10) add[run] = 10000*wave + 1000*Outrun[run];
	}
	for(i=1; i <= N; ++i)
	{
	    newbatch[i] = Batch[i] + add[Run_number[i]];

	    # register batch number, to gaurentee no conflicts
	    if(taken[newbatch[i]]) {plan = "C"; break;};
	    taken[newbatch[i]] = 1;
	}
    }
    
    # plan B	- run-wise identifiers
    if(plan == "B")
    {
	# run counter in two-digit identifier
	outrun=0;
	for(wave=1; wave<=waves; ++wave)
	{
	    for(run=1; run <= runs; ++run)
	    {
		if(Wave[start[run]] == wave) 
		{
		    ++outrun;
		    add[run] = 1000*outrun - 1000*int(Batch[start[run]]/1000);
		}
	    }
	}
	for(i=1; i <= N; ++i)
	{
	    newbatch[i] = Batch[i] + add[Run_number[i]];

	    # register batch number, to gaurentee no conflicts
	    if(taken[newbatch[i]]) {plan = "C"; break;};
	    taken[newbatch[i]] = 1;
	}
    }
    
    # plan C	- last attempt to preserve batch numbers
    if(plan == "C")
    {
	# things are looking pretty bad
	# either there are > 99 runs, or batch numbers > 999.
	# or two batches are inexplicably overlapping	

	# clear the "taken" registry
	for(thing in taken) taken[thing] = "";
	# first batch is always "taken"
	taken[1]=1;
	
	# Ideas?

	# closest 100+x packing?
	for(wave=1; wave<=waves; ++wave)
	{
	    for(run=1; run <= runs; ++run)
	    {
		if(wavenumber[Wave[start[run]]] == wave) 
		{
		    # search for a clear block of batch numbers
		    clear = 0
		    while(! clear)
		    {
		    	clear = 1;
		    	for(i=start[run];i!="";i=after[i])
		    	{
		    		if(taken[Batch[i]%1000+add[run]]) clear = 0;
		    	}
		    	if(! clear) add[run] += 100;
		    	
		    	# emergency exit
		    	if(add[run]>10000) break;
		    }
			# fix up "add" value to avoid 1000s place
		    add[run] = Batch[start[run]]%1000+add[run] - Batch[start[run]];

		    # register batch numbers, to gaurentee no conflicts
		    for(i=start[run];i!="";i=after[i])
		    {
			    taken[Batch[i]%1000+add[run]] = 1;
		    }
		}
	    }
	}
	# now double-check this batch-adding strategy
	for(thing in taken) taken[thing] = "";
	for(i=1; i <= N; ++i)
	{
	    newbatch[i] = Batch[i]%1000 + add[Run_number[i]];

	    # register batch number, to gaurentee no conflicts
	    if(taken[newbatch[i]]) {plan = "D"; break;};
	    taken[newbatch[i]] = 1;
	}
    }
    
    # plan D	- un-pretty ordinal batch assignments
    if(plan == "D")
    {
	# forget it, just remap this stuff to ordinal numbers
	lastbatch = 1;
	for(wave=1; wave<=waves; ++wave)
	{
	    for(run=1; run <= runs; ++run)
	    {
		if(wavenumber[Wave[start[run]]] == wave) 
		{
		    add[run] = lastbatch - Batch[start[run]] +1;
		    lastbatch = Batch[end[run]]+add[run];
		}
	    }
	}
	
	for(i=1; i <= N; ++i)
	{
	    # can't possibly need "taken" registry here
	    newbatch[i] = Batch[i] + add[Run_number[i]];
	}
    }
    
    
    ######################################
    
    # phase 5: expound on batch adding strategy
    
    subrun = 1;
    for(wave=1; wave<=waves; ++wave)
    {
	print "";
	printf "# wavelength %d : %s A = %.1f eV\\n", wave, Lambda[wave], 12398.4245/Lambda[wave];
	for(run=1; run <= runs; ++run)
	{
	    if(wave == wavenumber[Wave[start[run]]])
	    {
		# run through wedge
		begin = start[run];
		lastfile = File[start[run]]
		for(i=start[run];i!="";i=after[i])
		{
		    if(File[i] != lastfile)
		    {
#			printf "# add %5d to %5d - %5d in %s\\n", add[run], Batch[begin], Batch[before[i]], lastfile
			printf "# %5d + %5d to %5d in %s\\n", add[run], Batch[begin], Batch[before[i]], lastfile
			begin = i;
			++subrun;
		    }
		    lastfile = File[i];
		    Subrun[i] = subrun;
		}
		++subrun;
#		printf "# add %5d to %5d - %5d in %s\\n", add[run], Batch[begin], Batch[end[run]], lastfile
		printf "# %5d + %5d to %5d in %s\\n", add[run], Batch[begin], Batch[end[run]], lastfile
#		printf "#       %5d to %5d in %s +%d\\n", Batch[begin], Batch[end[run]], lastfile, add[run]

		printf "run %5d %5d to %5d\\n", Outrun[run], newbatch[start[run]], newbatch[end[run]];
	    }
	}
    }
    
    print ""
    
    # now dump the whole business
    
    print "# dump of all new batch numbers"
    for(i=1;i<=N;++i)
    {
	print Subrun[i], Run_number[i], newbatch[i], Line[i];
    }
}

EOF-parser
chmod a+x ${tempfile}parser.awk







cat << EOF-x >! ${tempfile}x.awk
EOF-x
chmod a+x ${tempfile}x.awk
rm -f ${tempfile}x.awk

goto Return_Unwrap_Awk_Scripts

exit











Problem:
################################################################################

 #####   #####    ####   #####   #       ######  #    #
 #    #  #    #  #    #  #    #  #       #       ##  ##
 #    #  #    #  #    #  #####   #       #####   # ## #
 #####   #####   #    #  #    #  #       #       #    #
 #       #   #   #    #  #    #  #       #       #    #
 #       #    #   ####   #####   ######  ######  #    #

################################################################################
#	Help routine
################################################################################
cat << EOF

usage: $0 [sentence about your project]

where:
[sentence about your project]	is something like:
files called */raw.mtz are 2A data of a 22kD protein with six metal sites per chain
OR
*/*/raw.mtz is from a crystal of peptide RMKQLEDKVEELLSKNYHLENEVARLKKLVG

We can figure it out from there.

you can also keep all this information in a file, and give that file
to $0

EOF













Cleanup:
if(! $?DEBUG) rm -f ${tempfile}* >& /dev/null

echo "Thank you, drive through! "

exit


# the Future

# TODO

- keep track of reindexing before run, and for each input file
- update solve.com at end?
- review failure modes
- new ginger
- intelligent scaleit.com
- merge all data in one column?
- export optimizations to script?
- allow command-line choice of reference
- review flow-of-control
- check remove/rename/reference input
- Online Help
- simplify if only one wavelength (no diso data)
- read x-plor files?


# WISH LIST

- fix reindexing bug in sort_everything.com
- postrefine
- improve auto-metal thing
- better indication of "bad frames" ("local" deviation for discontinuities?)
- keep track of Rmerge (or something) when making decisions
- assign wavelength names from edge??
- set up wARP
- figure out what the metal's peak f'' is for value in SOLVE

