#! /bin/csh -f
#
echo "Refmacer Elves v 1.2.5     Just getting started.   James Holton 8-8-04"
echo ""
#
#
#   Universal (more-or-less) setup of REFMAC
#
#	This script auto-detects chain IDs and chain end information in the
#	PDB file, and sets up refmac/protin/arpp appropriately
#
#	As long as your PDB is formatted properly for refmac: 
#	- Chain ID at column 22
#	- acetyl group is: 
#ATOM      1  CT2 ARG A   1      71.507  24.225  20.829  1.00 51.12   6       C  
#ATOM      2  CT1 ARG A   1      70.031  24.413  20.903  1.00 49.18   6       C  
#ATOM      3  OT  ARG A   1      69.217  23.895  20.123  1.00 50.52   8       O  
#ATOM      4  N   ARG A   1      69.934  25.196  21.953  1.00 47.52   7       N  
#	- multiple conformers are:
#ATOM     31  N   SER A   3      69.379  26.088  17.587  1.00 36.99   7       N  
#ATOM     32  CA  SER A   3      68.368  25.330  16.881  1.00 38.70   6       C  
#ATOM     33  C   SER A   3      66.970  25.474  17.504  1.00 38.53   6       C  
#ATOM     34  O   SER A   3      65.969  25.505  16.775  1.00 38.11   8       O  
#ATOM     35  CB  SER A   3      68.741  23.848  16.842  1.00 39.47   6       C  
#ATOM     36  OG ASER A   3      67.636  22.917  16.373  0.50 39.89   6       C  
#ATOM     37  OG BSER A   3      67.980  23.119  15.733  0.50 41.31   6       C  
#	this script should be able to tell REFMAC what it means
#
#
#
#
#	Things you will need:
#	the programs listed below
#
################################################################################
#
# non-ccp4 acessory programs used
#
set MAPMAN =  /programs/rave/lx_mapman

goto Find_awk
Return_Find_awk:

# nice symbols, but may not be portable
set ANG = `echo "" | nawk 'BEGIN{printf "\305"}'`
set DEG = `echo "" | nawk 'BEGIN{printf "\260"}'`
#set ANG = "A"
#set DEG = "deg"

if(! $?CCP4) then
    echo -n "Attempting to set up CCP4 ... "

    set ccp4setup = ""
    foreach place ( /programs/xtal/ccp4_3.4/ /usr/xtal/CCP4_v3.4/ /programs/xtal /programs/ /usr/xtal /usr/local /usr/ )
	if((! -e "$ccp4setup")&&(-e "$place")) then
	    # look for setup scripts here
	    set ccp4setup = `find ${place} -name ccp4.setup |& nawk '/ccp4.setup$/{print $NF}' | tail -1`
	endif
        if((-e "$ccp4setup")&&(! $?CCP4)) then
            source $ccp4setup
            setenv CCP4_SCR    `pwd`/temp
            setenv BINSORT_SCR `pwd`/temp
            echo "using $ccp4setup"
        endif
    end
endif
if(! $?CCP4) then
    echo "failed."
    echo "Please ask your sysadmin how to set up CCP4, "
    echo "Or go to: netscape http://www.dl.ac.uk/CCP/CCP4/main.html"
    echo "about getting the CCP4 program suite."
    echo "and run $0 again."
    
    echo "If you have Red Hat Linux, you can get ccp4 by typing:"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-lib-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-progs-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-etc-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-examples-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-doc-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-manual-3.5.1-5.i386.rpm"
    echo "rpm -i http://imsb.au.dk/~mok/linux/dist/rpms/ccp4-html-3.5.1-5.i386.rpm"
    exit 9
    set CCP4_LIB
endif
# CCP4 v4.x renamed arpp
set arpp = arpp
which $arpp >& /dev/null
if($status) then
    set arpp = arp_warp
endif

# CCP4 v5.x renamed refmac
set refmac = refmac
which $refmac >& /dev/null
if($status) then
    set refmac = refmac5
endif


setenv CCP4_OPEN       UNKNOWN
# make sure we can write to scratch directories
if(! $?CCP4_SCR) setenv CCP4_SCR .
if(! $?BINSORT_SCR) setenv BINSORT_SCR .

touch ${CCP4_SCR}/this$$ >& /dev/null
if($status) then
    # safest to do this
    setenv CCP4_SCR .
endif
rm -f ${CCP4_SCR}/this$$ >& /dev/null

touch ${BINSORT_SCR}/this$$ >& /dev/null
if($status) then
    # safest to do this
    setenv BINSORT_SCR .
endif
rm -f ${BINSORT_SCR}/this$$ >& /dev/null


# check that current directory is writable
touch ./this$$ >& /dev/null
if($status) then
    # can't write to current directory!
    chmod u+w . >& /dev/null
    touch ./this$$ >& /dev/null
    if($status) then
	# can't chmod current directory either
	echo "ERROR! We can't write to this directory!"
	pwd
	echo "Please cd to the place you want to process your data, and"
	echo "then run $0 again."
	exit 9
    else
	# warn user about what we did
	echo "Had to make current directory writable:"
	echo "chmod u+w ."
    endif
    rm -f ./this$$ >& /dev/null
endif
rm -f ./this$$ >& /dev/null

# no dumping! 
limit coredumpsize 0

if(! -e scripts) mkdir scripts
if(! -e temp)    mkdir temp
if(! -e maps)    mkdir maps
if(! -e logs)    mkdir logs
if(! -e pdb)     mkdir pdb
if(! -e o)       mkdir o

#
################################################################################
#
#  Set default parameters

set scriptname     = ./scripts/refmac.com
set convergescript = ./scripts/converge.com
set arpscriptname  = ./scripts/arp.com
set fftscriptname  = ./scripts/maps.com
set RMSD           = ./scripts/rmsd

set newpdbname = ./pdb/starthere.pdb
set mtzfile    = ./refine.mtz

set F	       = ""
set SIGF       = ""
set hires      = ""
set lores      = ""
set HL	       = ""
set dicfile    = "../protin.SeMET.dic"
set dicfile    = ""

set xyzlim   = ""


# Cycle Numbers for running arp, or quitting 
# converge will do "refcycle" rounds of refmac refinement per arp cycle
set SUPERCYCLE = 10
# ARP will run every "arpcycle" refmac cycles (regardless of convergence)
set arpcycle  = 1000
# this script will quit after "maxcycle" total refmac cycles
set maxcycle  = 1000
set cycle     = ""

# chain to which ARP will add water atoms (default, water chain in your PDB)
set ARPCHAIN  = "S"

###############################################################################
###############################################################################
###############################################################################
###############################################################################
###############################################################################
###############################################################################
###############################################################################
###############################################################################
###############################################################################

set tempfile = $CCP4_SCR/refmactemp
set SG       = ""
set pdbin    = ""

# scan command line
foreach arg ( $* )
    if($arg =~ *.mtz) set mtzfile = $arg
    if($arg =~ *.pdb) set pdbin = $arg
    if($arg =~ *.brk) set pdbin = $arg
    if(($arg =~ [1-9]*)&&($arg =~ *[0-9])) set cycle = $arg
    if($arg == nice)  renice -n 40 $$
    if($arg =~ *.dic) set dicfile = $arg
end

if(! -e "$pdbin") then
    echo "ERROR: $pdbin does not exist! "
endif

if(! -e "$mtzfile") then
    echo "ERROR: $mtzfile does not exist! "
endif

if((! -e "$pdbin")||(! -e "$mtzfile")) then
cat << EOF

usage: $0 input.pdb [./refine.mtz] [converge] [nice]

where:
	input.pdb 	- the PDB file you want to refine
	refine.mtz	- the file containing Fs and phases you want to refine against

	converge	- wait for refmac to converge, then run arp, (and repeat)
	nice		- run refmac/arp as "nice" processes

--------

logs/refmac*.log	- will be the REFMAC logs
 pdb/refmac*.pdb	- will be the refined PDB files
   o/*.omap		- will be the sigmaa maps (O format)

EOF
    exit 1
endif

#######################################################################################
#   check for dictionary file
#
if(! -e "$dicfile") then
    if("$dicfile" != "") then
	echo "WARNING: $dicfile does not exist! "
	echo "         using $CLIBD/protin.dic "
    endif
    set dicfile = "$CLIBD/protin.dic"
endif

#######################################################################################
#   Unwrap utility scripts
#
goto Unwrap_awk_Scripts
# ${tempfile}mtzstuff.awk
# ${tempfile}reformatpdb.awk
RetrnUnwrap_awk_Scripts:


#######################################################################################
#   Info from MTZ file
#
# get Cell and SG from MTZ file
echo "go" | mtzdump HKLIN $mtzfile >! ${tempfile}mtzdump
if($status) then
    echo "unable to read $mtzfile"
    echo "make sure $mtzfile exists, and is not corruped, and then run"
    echo "$0 again."
    exit 255
endif
# do this now
set HKLs = `nawk '/Number of Reflections/{print $NF}' ${tempfile}mtzdump`

# now turn the dump into something more readable
cat ${tempfile}mtzdump |\
nawk -f ${tempfile}mtzstuff.awk >! ${tempfile}
mv ${tempfile} ${tempfile}mtzdump

set SG = `nawk '/^SYMM/{print $NF}' ${tempfile}mtzdump`
set CELL = `nawk '/^CELL/{print substr($0, 5)}' ${tempfile}mtzdump`
#
# get high and low resolution limits
if("$hires" == "") set hires = `nawk '/^RESO/{print $NF}' ${tempfile}mtzdump`
if("$lores" == "") set lores = `nawk '/^RESO/{print $2}' ${tempfile}mtzdump`
#
# get "best" F and SIGF in $mtzfile
cat ${tempfile}mtzdump |\
nawk '/^COL/ && $3=="F"{comp[$2]=$NF} /^F:/{print $0, $NF*comp[$2]}' |\
sort -n +4 | tail -1 |\
cat >! ${tempfile}
set    F = `nawk '{print $2}' ${tempfile}`
set SIGF = `nawk '{print $3}' ${tempfile}`
rm -f ${tempfile} >& /dev/null
#
# get last four HL coefficients
if("$HL" == "") then
#    nawk '/^COL/ && $3=="A"{print $2}' ${tempfile}mtzdump |\
#    tail -4 |\
#    nawk '/HLA/{tag="HLA"} /HLB/{tag="HLB"} \
#         /HLC/{tag="HLC"} /HLD/{tag="HLD"} \
#	  {print tag "=" $1}' |\
#    cat >! ${tempfile}
    nawk '/^COL/ && $3=="A"{print $2}' ${tempfile}mtzdump |\
    tail -4 |\
    nawk 'NR==1{tag="HLA"} NR==2{tag="HLB"} \
          NR==3{tag="HLC"} NR==4{tag="HLD"} \
	  {print tag "=" $1}' |\
    cat >! ${tempfile}
    
    set HL = `cat ${tempfile}`
    if($#HL != 4) then
	echo "WARNING: could not find HL coefficients in $mtzfile! "
	set HL = ""
    endif
    rm -f ${tempfile} >& /dev/null
endif
#
# check for free-R flags
set temp = `nawk '/^COL/ && $3=="I" && $2=="FreeR_flag" {print $2}' ${tempfile}mtzdump`
if("$temp" == "") then
    # more dangerous, but probably okay
    set temp = `nawk '/^COL/ && $3=="I" {print $2}' ${tempfile}mtzdump`
endif
if("$temp" == "") then
    echo "WARNING: could not find FreeR_flag in $mtzfile "
    echo "         Rfree will not be used! "
    echo "         please use "\"FreeR_flag\"" as a label for them. "
    echo "         believe me, you'll be glad you did."
    set FREE = ""
else
    set FREE = "FREE=$temp"
endif
#
rm -f ${tempfile}mtzdump >& /dev/null
rm -f ${tempfile}mtzstuff.awk >& /dev/null
#
#
#
#######################################################################################
#
#   Examine the PDB
#
# first, run the PDB through a cannonizing filter
cat $pdbin |\
 nawk -f ${tempfile}reformatpdb.awk |\
 nawk '! /^ATOM/ || /^ATOM/ && substr($0,14,1) !~ /[1-9H]/' |\
 cat >! ${tempfile}fixcell.pdb

pdbset XYZIN ${tempfile}fixcell.pdb XYZOUT $newpdbname << EOF-fixcell >& /dev/null
CELL $CELL
EOF-fixcell

rm -f ${tempfile}fixcell.pdb

# decide if we can refine individual Bs
set B_REFINE = "ISOTROPIC"
set ATOMs = `nawk '/^ATOM/' $pdbin | wc -l`
if( $HKLs < ( 4 * $ATOMs ) ) set B_REFINE = "OVERALL"
#
# check the unit cell too
echo "MTZ $CELL" |\
nawk '{printf "MTZ %8.3f %8.3f %6.1f %6.1f %6.1f %6.1f\n", $2, $3, $4, $5, $6, $7}' |\
cat >! ${tempfile}cell
nawk '/^CRYST/' $pdbin |\
nawk '{printf "PDB %8.3f %8.3f %6.1f %6.1f %6.1f %6.1f\n", $2, $3, $4, $5, $6, $7}' |\
cat >> ${tempfile}cell

cat ${tempfile}cell |\
nawk 'NR==1{for(i=2;i<=NF;++i){c[i]=$i;}}\
  NR==2{for(i=2;i<=NF;++i){d=sqrt(($i-c[i])^2); if(maxd<d)maxd=d;}}\
  END{printf "%d", maxd*100}' |\
cat >! ${tempfile}
set temp = `head -1 ${tempfile}`
if("$temp" == "") set temp = 0
rm -f ${tempfile}
    
if($temp > 3) then
    echo "WARNING: unit cells"
    cat ${tempfile}cell
    echo "are kind of different! "
endif
rm -f ${tempfile}cell

#
# get chains from PDB
cat $newpdbname |\
nawk '/^ATOM/{c=substr($0, 22, 1); if(c==" ")c="_"; print c}' |\
nawk '$1 != c{++n; print "CHNNAM ID", $1, "CHNTYP", n; c=$1}' |\
cat >! ${tempfile}chnnam
set chains = `nawk '{print $3}' ${tempfile}chnnam`

# determine chain ends
echo -n "" >! ${tempfile}chntyp
foreach chain ( $chains )

    # get the exact chain number we're going to tell protin
    set i = `nawk -v chain="$chain" '$3==chain{print $NF}' ${tempfile}chnnam`
    
    # "_" represents no chain ID
    if("$chain" == "_") set chain = " "
    
    # extract the chain, for ease of use
    cat $newpdbname |\
    nawk -v chain="$chain" '/^ATOM/ && substr($0, 22, 1)==chain' |\
    cat >! ${tempfile}chain_$chain
    
    # detect non-protein chains
    set temp = `nawk 'substr($0, 13, 3) == " CA"' ${tempfile}chain_$chain | head -1`
    if("$temp" == "") then
	# no alpha carbons, so must be a non-protein chain
	set CHNTYP = "NON"
	
	# see if this is a water chain
	set temp = `nawk 'substr($0, 13, 2) != " O"' ${tempfile}chain_$chain | head -1`
	if("$temp" == "") then
	    set CHNTYP = "WAT"
	    # use last water chain as the ARP chain
	    set ARPCHAIN = $chain
	endif
	
	echo "CHNTYP $i $CHNTYP" >> ${tempfile}chntyp
	
	# skip to next chain
	rm -f ${tempfile}chain_$chain
	continue
    endif
    
    # get terminal residues
    cat ${tempfile}chain_$chain |\
    nawk '{print substr($0, 23, 5), substr($0, 18, 3)}'  |\
    cat >! ${tempfile}
    set Nter = `head -1 ${tempfile}`
    set Cter = `tail -1 ${tempfile}`
    
    # get capping groups: 5=acetyl, 4=formyl, 3=amino, 2=carboxyl
    cat ${tempfile}chain_$chain |\
    nawk '{Atom=substr($0, 13, 4);\
    if(Atom == " CT1"){print "5"};\
    if(Atom == " CT "){print "4"};\
    if(Atom == " N  "){print "3"};\
    }' >! ${tempfile}
    set Ncap = `head -1 ${tempfile}`
    rm -f ${tempfile}
    # default to amino terminus
    if("$Ncap" == "") set Ncap = "3"
    
    # COOH will never hurt
    set Ccap = "2"
    
    # send this keycard to the file
    echo "CHNTYP $i NTERM $Nter $Ncap CTERM $Cter $Ccap" >> ${tempfile}chntyp
    
    
    
    
    ############################################
    # detect secondary structure
    cat ${tempfile}chain_$chain |\
    nawk -f ${tempfile}phipsi.awk |\
    cat >! ${tempfile}phipsi
    
    
    # initialize temp file
    echo -n "" >! ${tempfile}seco
    
    # alpha helix
    cat ${tempfile}phipsi |\
    nawk -v phi=-64 -v psi=-40 'NF>6{++n; num[n]=$3; \
      dev[n]=sqrt((($5-phi+3780)%360-180)^2) + sqrt((($7-psi+3780)%360-180)^2)}\
      END{dev[0]=dev[-1]=dev[-2]=dev[1];dev[n+3]=dev[n+2]=dev[n+1]=dev[n];\
    for(i=1;i<=n;++i){smooth[i]=(dev[i-2]+dev[i-1]+dev[i]+dev[i+1]+dev[i+2])/5};\
    for(i=1;i<=n;++i){print num[i], dev[i], smooth[i]}}' |\
    cat >! ${tempfile}scores
    
    # choose 15-degree error cutoff
    nawk '$NF < 15' ${tempfile}scores |\
    nawk '$1 != nextN{if(nextN!="")printf "%d ", nextN-1; printf "%d ", $1} \
       {nextN=$1+1} END{print nextN-1}' |\
    nawk -v chnum=$i '{for(i=1;i<NF;i+=2){\
    printf "CHNTYP %d SECO %4d %4d 1    # helix\n", chnum, $i, $(i+1)}}' |\
    nawk '$5-$4 > 1' >> ${tempfile}seco
    
    
    
    
    # parallel beta sheet
    cat ${tempfile}phipsi |\
    nawk -v phi=-119 -v psi=113 'NF>6{++n; num[n]=$3; \
      dev[n]=sqrt((($5-phi+3780)%360-180)^2) + sqrt((($7-psi+3780)%360-180)^2)}\
      END{dev[0]=dev[-1]=dev[-2]=dev[1];dev[n+3]=dev[n+2]=dev[n+1]=dev[n];\
    for(i=1;i<=n;++i){smooth[i]=(dev[i-2]+dev[i-1]+dev[i]+dev[i+1]+dev[i+2])/5};\
    for(i=1;i<=n;++i){print num[i], dev[i], smooth[i]}}' |\
    cat >! ${tempfile}scores
    
    # choose 30-degree error cutoff
    nawk '$NF < 30' ${tempfile}scores |\
    nawk '$1 != nextN{if(nextN!="")printf "%d ", nextN-1; printf "%d ", $1} \
       {nextN=$1+1} END{print nextN-1}' |\
    nawk -v chnum=$i '{for(i=1;i<NF;i+=2){\
    printf "CHNTYP %d SECO %4d %4d 5    # B-sheet\n", chnum, $i, $(i+1)}}' |\
    nawk '$5-$4 > 1' >> ${tempfile}seco

    
    
    
    # antiparallel beta sheet
    cat ${tempfile}phipsi |\
    nawk -v phi=-139 -v psi=135.0 'NF>6{++n; num[n]=$3; \
      dev[n]=sqrt((($5-phi+3780)%360-180)^2) + sqrt((($7-psi+3780)%360-180)^2)}\
      END{dev[0]=dev[-1]=dev[-2]=dev[1];dev[n+3]=dev[n+2]=dev[n+1]=dev[n];\
    for(i=1;i<=n;++i){smooth[i]=(dev[i-2]+dev[i-1]+dev[i]+dev[i+1]+dev[i+2])/5};\
    for(i=1;i<=n;++i){print num[i], dev[i], smooth[i]}}' |\
    cat >! ${tempfile}scores
    
    # choose 30-degree error cutoff
    nawk '$NF < 30' ${tempfile}scores |\
    nawk '$1 != nextN{if(nextN!="")printf "%d ", nextN-1; printf "%d ", $1} \
       {nextN=$1+1} END{print nextN-1}' |\
    nawk -v chnum=$i '{for(i=1;i<NF;i+=2){\
    printf "CHNTYP %d SECO %4d %4d 6    # ap B-sheet\n", chnum, $i, $(i+1)}}' |\
    nawk '$5-$4 > 1' >> ${tempfile}seco




    # 3-10 HELIX (3.0/10)
    cat ${tempfile}phipsi |\
    nawk -v phi=-75.5 -v psi=-4.5 'NF>6{++n; num[n]=$3; \
      dev[n]=sqrt((($5-phi+3780)%360-180)^2) + sqrt((($7-psi+3780)%360-180)^2)}\
      END{dev[0]=dev[-1]=dev[-2]=dev[1];dev[n+3]=dev[n+2]=dev[n+1]=dev[n];\
    for(i=1;i<=n;++i){smooth[i]=(dev[i-2]+dev[i-1]+dev[i]+dev[i+1]+dev[i+2])/5};\
    for(i=1;i<=n;++i){print num[i], dev[i], smooth[i]}}' |\
    cat >! ${tempfile}scores
    
    # choose 30-degree error cutoff
    nawk '$NF < 30' ${tempfile}scores |\
    nawk '$1 != nextN{if(nextN!="")printf "%d ", nextN-1; printf "%d ", $1} \
       {nextN=$1+1} END{print nextN-1}' |\
    nawk -v chnum=$i '{for(i=1;i<NF;i+=2){\
    printf "CHNTYP %d SECO %4d %4d 2    # 3-10 helix\n", chnum, $i, $(i+1)}}' |\
    nawk '$5-$4 > 1' >> ${tempfile}seco




    # PI HELIX (4.4/16)
    cat ${tempfile}phipsi |\
    nawk -v phi=-57.1 -v psi=-69.7 'NF>6{++n; num[n]=$3; \
      dev[n]=sqrt((($5-phi+3780)%360-180)^2) + sqrt((($7-psi+3780)%360-180)^2)}\
      END{dev[0]=dev[-1]=dev[-2]=dev[1];dev[n+3]=dev[n+2]=dev[n+1]=dev[n];\
    for(i=1;i<=n;++i){smooth[i]=(dev[i-2]+dev[i-1]+dev[i]+dev[i+1]+dev[i+2])/5};\
    for(i=1;i<=n;++i){print num[i], dev[i], smooth[i]}}' |\
    cat >! ${tempfile}scores
    
    # choose 30-degree error cutoff
    nawk '$NF < 30' ${tempfile}scores |\
    nawk '$1 != nextN{if(nextN!="")printf "%d ", nextN-1; printf "%d ", $1} \
       {nextN=$1+1} END{print nextN-1}' |\
    nawk -v chnum=$i '{for(i=1;i<NF;i+=2){\
    printf "CHNTYP %d SECO %4d %4d 3    # pi helix\n", chnum, $i, $(i+1)}}' |\
    nawk '$5-$4 > 1' >> ${tempfile}seco




    # COLLAGEN-TYPE HELIX
    cat ${tempfile}phipsi |\
    nawk -v phi=-64.0 -v psi=145.0 'NF>6{++n; num[n]=$3; \
      dev[n]=sqrt((($5-phi+3780)%360-180)^2) + sqrt((($7-psi+3780)%360-180)^2)}\
      END{dev[0]=dev[-1]=dev[-2]=dev[1];dev[n+3]=dev[n+2]=dev[n+1]=dev[n];\
    for(i=1;i<=n;++i){smooth[i]=(dev[i-2]+dev[i-1]+dev[i]+dev[i+1]+dev[i+2])/5};\
    for(i=1;i<=n;++i){print num[i], dev[i], smooth[i]}}' |\
    cat >! ${tempfile}scores
    
    # choose 30-degree error cutoff
    nawk '$NF < 30' ${tempfile}scores |\
    nawk '$1 != nextN{if(nextN!="")printf "%d ", nextN-1; printf "%d ", $1} \
       {nextN=$1+1} END{print nextN-1}' |\
    nawk -v chnum=$i '{for(i=1;i<NF;i+=2){\
    printf "CHNTYP %d SECO %4d %4d 4    # collagen helix\n", chnum, $i, $(i+1)}}' |\
    nawk '$5-$4 > 1' >> ${tempfile}seco




    # CLASSIC BETA-BULGE 1
    cat ${tempfile}phipsi |\
    nawk -v phi=-95.0 -v psi=-65.0 'NF>6{++n; num[n]=$3; \
      dev[n]=sqrt((($5-phi+3780)%360-180)^2) + sqrt((($7-psi+3780)%360-180)^2)}\
      END{dev[0]=dev[-1]=dev[-2]=dev[1];dev[n+3]=dev[n+2]=dev[n+1]=dev[n];\
    for(i=1;i<=n;++i){smooth[i]=(dev[i-2]+dev[i-1]+dev[i]+dev[i+1]+dev[i+2])/5};\
    for(i=1;i<=n;++i){print num[i], dev[i], smooth[i]}}' |\
    cat >! ${tempfile}scores
    
    # choose 30-degree error cutoff
    nawk '$NF < 30' ${tempfile}scores |\
    nawk '$1 != nextN{if(nextN!="")printf "%d ", nextN-1; printf "%d ", $1} \
       {nextN=$1+1} END{print nextN-1}' |\
    nawk -v chnum=$i '{for(i=1;i<NF;i+=2){\
    printf "CHNTYP %d SECO %4d %4d 7    # type 1 B-bulge\n", chnum, $i, $(i+1)}}' |\
    nawk '$5-$4 > 1' >> ${tempfile}seco




    # CLASSIC BETA-BULGE 2
    cat ${tempfile}phipsi |\
    nawk -v phi=-130.0 -v psi=150.0 'NF>6{++n; num[n]=$3; \
      dev[n]=sqrt((($5-phi+3780)%360-180)^2) + sqrt((($7-psi+3780)%360-180)^2)}\
      END{dev[0]=dev[-1]=dev[-2]=dev[1];dev[n+3]=dev[n+2]=dev[n+1]=dev[n];\
    for(i=1;i<=n;++i){smooth[i]=(dev[i-2]+dev[i-1]+dev[i]+dev[i+1]+dev[i+2])/5};\
    for(i=1;i<=n;++i){print num[i], dev[i], smooth[i]}}' |\
    cat >! ${tempfile}scores
    
    # choose 30-degree error cutoff
    nawk '$NF < 30' ${tempfile}scores |\
    nawk '$1 != nextN{if(nextN!="")printf "%d ", nextN-1; printf "%d ", $1} \
       {nextN=$1+1} END{print nextN-1}' |\
    nawk -v chnum=$i '{for(i=1;i<NF;i+=2){\
    printf "CHNTYP %d SECO %4d %4d 8    # type 2 B-bulge\n", chnum, $i, $(i+1)}}' |\
    nawk '$5-$4 > 1' >> ${tempfile}seco




    # BETA-BEND I (1-4)  2
    cat ${tempfile}phipsi |\
    nawk -v phi=-70.0 -v psi=-30.0 'NF>6{++n; num[n]=$3; \
      dev[n]=sqrt((($5-phi+3780)%360-180)^2) + sqrt((($7-psi+3780)%360-180)^2)}\
      END{dev[0]=dev[-1]=dev[-2]=dev[1];dev[n+3]=dev[n+2]=dev[n+1]=dev[n];\
    for(i=1;i<=n;++i){smooth[i]=(dev[i-2]+dev[i-1]+dev[i]+dev[i+1]+dev[i+2])/5};\
    for(i=1;i<=n;++i){print num[i], dev[i], smooth[i]}}' |\
    cat >! ${tempfile}scores
    
    # choose 30-degree error cutoff
    nawk '$NF < 30' ${tempfile}scores |\
    nawk '$1 != nextN{if(nextN!="")printf "%d ", nextN-1; printf "%d ", $1} \
       {nextN=$1+1} END{print nextN-1}' |\
    nawk -v chnum=$i '{for(i=1;i<NF;i+=2){\
    printf "CHNTYP %d SECO %4d %4d 9    # B-bend I \n", chnum, $i, $(i+1)}}' |\
    nawk '$5-$4 > 1' >> ${tempfile}seco




    #BETA-BEND I (1-4)  3
    cat ${tempfile}phipsi |\
    nawk -v phi=-90 -v psi=10 'NF>6{++n; num[n]=$3; \
      dev[n]=sqrt((($5-phi+3780)%360-180)^2) + sqrt((($7-psi+3780)%360-180)^2)}\
      END{dev[0]=dev[-1]=dev[-2]=dev[1];dev[n+3]=dev[n+2]=dev[n+1]=dev[n];\
    for(i=1;i<=n;++i){smooth[i]=(dev[i-2]+dev[i-1]+dev[i]+dev[i+1]+dev[i+2])/5};\
    for(i=1;i<=n;++i){print num[i], dev[i], smooth[i]}}' |\
    cat >! ${tempfile}scores
    
    # choose 30-degree error cutoff
    nawk '$NF < 30' ${tempfile}scores |\
    nawk '$1 != nextN{if(nextN!="")printf "%d ", nextN-1; printf "%d ", $1} \
       {nextN=$1+1} END{print nextN-1}' |\
    nawk -v chnum=$i '{for(i=1;i<NF;i+=2){\
    printf "CHNTYP %d SECO %4d %4d 10   # B-bend I \n", chnum, $i, $(i+1)}}' |\
    nawk '$5-$4 > 1' >> ${tempfile}seco




    # BETA-BEND II (1-4) 2
    cat ${tempfile}phipsi |\
    nawk -v phi=-60 -v psi=130 'NF>6{++n; num[n]=$3; \
      dev[n]=sqrt((($5-phi+3780)%360-180)^2) + sqrt((($7-psi+3780)%360-180)^2)}\
      END{dev[0]=dev[-1]=dev[-2]=dev[1];dev[n+3]=dev[n+2]=dev[n+1]=dev[n];\
    for(i=1;i<=n;++i){smooth[i]=(dev[i-2]+dev[i-1]+dev[i]+dev[i+1]+dev[i+2])/5};\
    for(i=1;i<=n;++i){print num[i], dev[i], smooth[i]}}' |\
    cat >! ${tempfile}scores
    
    # choose 30-degree error cutoff
    nawk '$NF < 30' ${tempfile}scores |\
    nawk '$1 != nextN{if(nextN!="")printf "%d ", nextN-1; printf "%d ", $1} \
       {nextN=$1+1} END{print nextN-1}' |\
    nawk -v chnum=$i '{for(i=1;i<NF;i+=2){\
    printf "CHNTYP %d SECO %4d %4d 11   # B-bend II \n", chnum, $i, $(i+1)}}' |\
    nawk '$5-$4 > 1' >> ${tempfile}seco




    # BETA-BEND II (1-4) 3
    cat ${tempfile}phipsi |\
    nawk -v phi=80 -v psi=0 'NF>6{++n; num[n]=$3; \
      dev[n]=sqrt((($5-phi+3780)%360-180)^2) + sqrt((($7-psi+3780)%360-180)^2)}\
      END{dev[0]=dev[-1]=dev[-2]=dev[1];dev[n+3]=dev[n+2]=dev[n+1]=dev[n];\
    for(i=1;i<=n;++i){smooth[i]=(dev[i-2]+dev[i-1]+dev[i]+dev[i+1]+dev[i+2])/5};\
    for(i=1;i<=n;++i){print num[i], dev[i], smooth[i]}}' |\
    cat >! ${tempfile}scores
    
    # choose 30-degree error cutoff
    nawk '$NF < 30' ${tempfile}scores |\
    nawk '$1 != nextN{if(nextN!="")printf "%d ", nextN-1; printf "%d ", $1} \
       {nextN=$1+1} END{print nextN-1}' |\
    nawk -v chnum=$i '{for(i=1;i<NF;i+=2){\
    printf "#CHNTYP %d SECO %4d %4d 12   # B-bend II \n", chnum, $i, $(i+1)}}' |\
    nawk '$5-$4 > 1' >> ${tempfile}seco




    # GAMMA-TURN (1-3) 1
    cat ${tempfile}phipsi |\
    nawk -v phi=172.0 -v psi=128.0 'NF>6{++n; num[n]=$3; \
      dev[n]=sqrt((($5-phi+3780)%360-180)^2) + sqrt((($7-psi+3780)%360-180)^2)}\
      END{dev[0]=dev[-1]=dev[-2]=dev[1];dev[n+3]=dev[n+2]=dev[n+1]=dev[n];\
    for(i=1;i<=n;++i){smooth[i]=(dev[i-2]+dev[i-1]+dev[i]+dev[i+1]+dev[i+2])/5};\
    for(i=1;i<=n;++i){print num[i], dev[i], smooth[i]}}' |\
    cat >! ${tempfile}scores
    
    # choose 30-degree error cutoff
    nawk '$NF < 30' ${tempfile}scores |\
    nawk '$1 != nextN{if(nextN!="")printf "%d ", nextN-1; printf "%d ", $1} \
       {nextN=$1+1} END{print nextN-1}' |\
    nawk -v chnum=$i '{for(i=1;i<NF;i+=2){\
    printf "CHNTYP %d SECO %4d %4d 13   # gamma turn\n", chnum, $i, $(i+1)}}' |\
    nawk '$5-$4 > 1' >> ${tempfile}seco




    # GAMMA-TURN (1-3) 2
    cat ${tempfile}phipsi |\
    nawk -v phi=68.0 -v psi=-61 'NF>6{++n; num[n]=$3; \
      dev[n]=sqrt((($5-phi+3780)%360-180)^2) + sqrt((($7-psi+3780)%360-180)^2)}\
      END{dev[0]=dev[-1]=dev[-2]=dev[1];dev[n+3]=dev[n+2]=dev[n+1]=dev[n];\
    for(i=1;i<=n;++i){smooth[i]=(dev[i-2]+dev[i-1]+dev[i]+dev[i+1]+dev[i+2])/5};\
    for(i=1;i<=n;++i){print num[i], dev[i], smooth[i]}}' |\
    cat >! ${tempfile}scores
    
    # choose 30-degree error cutoff
    nawk '$NF < 30' ${tempfile}scores |\
    nawk '$1 != nextN{if(nextN!="")printf "%d ", nextN-1; printf "%d ", $1} \
       {nextN=$1+1} END{print nextN-1}' |\
    nawk -v chnum=$i '{for(i=1;i<NF;i+=2){\
    printf "CHNTYP %d SECO %4d %4d 14   # gamma turn\n", chnum, $i, $(i+1)}}' |\
    nawk '$5-$4 > 1' >> ${tempfile}seco




    # GAMMA-TURN (1-3) 3
    cat ${tempfile}phipsi |\
    nawk -v phi=-131 -v psi=162 'NF>6{++n; num[n]=$3; \
      dev[n]=sqrt((($5-phi+3780)%360-180)^2) + sqrt((($7-psi+3780)%360-180)^2)}\
      END{dev[0]=dev[-1]=dev[-2]=dev[1];dev[n+3]=dev[n+2]=dev[n+1]=dev[n];\
    for(i=1;i<=n;++i){smooth[i]=(dev[i-2]+dev[i-1]+dev[i]+dev[i+1]+dev[i+2])/5};\
    for(i=1;i<=n;++i){print num[i], dev[i], smooth[i]}}' |\
    cat >! ${tempfile}scores
    
    # choose 30-degree error cutoff
    nawk '$NF < 30' ${tempfile}scores |\
    nawk '$1 != nextN{if(nextN!="")printf "%d ", nextN-1; printf "%d ", $1} \
       {nextN=$1+1} END{print nextN-1}' |\
    nawk -v chnum=$i '{for(i=1;i<NF;i+=2){\
    printf "CHNTYP %d SECO %4d %4d 15   # gamma turn\n", chnum, $i, $(i+1)}}' |\
    nawk '$5-$4 > 1' >> ${tempfile}seco

    
    
    # only do top 50 secondary structure constraints
    cat ${tempfile}seco |\
    nawk '{printf "%4d %s\n", $5-$4, $0}' |\
    sort -n +0 -1 |\
    head -50 |\
    nawk '{print substr($0, 6)}' |\
    sort +1n -2 +3n -4 >! ${tempfile}
    mv ${tempfile} ${tempfile}seco
    
    # cis-peptide bonds
    cat ${tempfile}phipsi |\
    nawk -v pep=180.0 'NF>6{++n; num[n]=$3; \
      dev=sqrt((($9-pep+3780)%360-180)^2);\
      print $3, $9, dev}' |\
    cat >! ${tempfile}scores
    
    # choose 120-degree error cutoff
    nawk '$NF > 120{print $1}' ${tempfile}scores |\
    sort -un |\
    nawk '{printf "%d ", $1}' |\
    nawk -v chnum=$i '{\
    print "CHNTYP",chnum, "CISPEP", NF,"   " $0, "    # cis-peptide bonds"}' |\
    cat >> ${tempfile}seco
    
    
    
    # intrachain disulphides?
    cat ${tempfile}chain_$chain |\
    nawk 'substr($0, 13, 3)==" SG" && substr($0, 18, 3)=="CYS"{\
    print substr($0, 23, 4), substr($0, 31, 8), substr($0, 39, 8), substr($0, 47, 8)}' |\
    nawk '{++n; res[n]=$1; X[n]=$2; Y[n]=$3; Z[n]=$4; for(i=1;i<n;++i){\
         dist=sqrt((X[i]-X[n])^2 +(Y[i]-Y[n])^2 +(Z[i]-Z[n])^2);\
	 print res[i], res[n], dist}}' |\
    nawk '$3 < 5 {printf "%s %s ", $1, $2}' |\
    nawk -v chnum=$i 'NF>=2{\
    printf "CHNTYP %d DISU %2d %s    # disulphide bridges\n", chnum, NF/2, $0}' |\
    cat >> ${tempfile}seco
    
    # now reorganize all this stuff and put it in the chntyp list
    sort +1n -2 +3n -4 ${tempfile}seco |\
    awk '{print "#" $0}' |\
    cat >> ${tempfile}chntyp

    # clean up
    rm -f ${tempfile}chain_$chain >& /dev/null
    rm -f ${tempfile}seco         >& /dev/null
    rm -f ${tempfile}scores       >& /dev/null
    rm -f ${tempfile}phipsi       >& /dev/null
end
#
# organize secondary-structure cards
#
# look for inerchain disulphides
cat $newpdbname |\
nawk 'substr($0, 13, 3)==" SG" && substr($0, 18, 3)=="CYS"{\
c=substr($0, 22, 1); if(c==" ")c="_";\
print c, substr($0, 23, 4), substr($0, 31, 8), substr($0, 39, 8), substr($0, 47, 8)}' |\
nawk '{++n; c[n]=$1; res[n]=$2; X[n]=$3; Y[n]=$4; Z[n]=$5; for(i=1;i<n;++i){\
    dist=sqrt((X[i]-X[n])^2 +(Y[i]-Y[n])^2 +(Z[i]-Z[n])^2);\
    print c[i], res[i], c[n], res[n], dist}}' |\
nawk '$NF < 5 && $1 != $3{++count[$1 " " $3]; \
    ds[$1 " " $3] = ds[$1 " " $3] " " $2 " " $4}\
    END{for(pair in ds){\
printf "DISULPHIDE %d DISU %s %s    # interchain disulphide bridges\n",\
 count[pair], pair, ds[pair]}}' |\
cat >> ${tempfile}chntyp


# make sure there's at least one water chain
grep "WAT" ${tempfile}chntyp >& /dev/null
if($status) then
    @ i = ( $i + 1 )
    echo "CHNNAM ID $ARPCHAIN CHNTYP $i" >> ${tempfile}chnnam
    echo "CHNTYP $i WAT" >> ${tempfile}chntyp
endif
#

# Rule-of-Thumb formula given in ARP manual
set ADDATOMS = `grep ATOM $newpdbname | wc -l | nawk -v hires=$hires '{printf "%d", 0.08*$1/(3*hires)}'`
# other rule-of-thumb in ARP manual (remove half as much as you add)
set REMATOMS = `echo "$ADDATOMS" | nawk '{printf "%d", $1/2}'`


set temp = "with one, overall B factor "
if("$B_REFINE" == "ISOTROPIC") set temp = "with individual B factors"
echo "refining $ATOMs atoms in $pdbin $temp "
echo "against $HKLs values of ${F}/${SIGF} $HL in $mtzfile"
echo "protein layout:"
cat ${tempfile}chnnam


# clean up
rm -f ${tempfile}phipsi.awk >& /dev/null
rm -f ${tempfile}reformatpdb.awk >& /dev/null

goto Deploy_scripts
################################################################################
################################################################################
# END OF SETUP ROUTINES
################################################################################
################################################################################

Find_awk:
# make sure nawk works
set program = "nawk"
foreach name ( nawk awk gawk )
    test -x "$program"
    if(! $status) break
    
    set possibilities = `which $name |& grep -v ' not in ' | tail -1`
    foreach file ( $possibilities )
	test -x "$file"
	if(! $status) then
	    # test for desired functionality (change this?)
	    set temp = `echo "1.54" | $file '{printf("%3d", 3147.7 * ( $1 )^(-3.014))}' |& cat`
	    if("$temp" == 856) then
		set program = "$file"
		break
	    endif
	endif
    end
    unset possibilities
end
test -x "$program"
if($status) then
    set program = "awk"
    foreach place ( /bin /usr/bin /usr/local/bin  )
	test -x "$program"
	if(($status)&&(-e $place)) then
	    # keep looking
	    set files = `ls -1L ${place} |& grep "$program" |& sort -4n |& head -20 `
	    foreach file ( $files )
		# test for desired functionality
		set temp = `echo "1.54" | $file '{printf("%3d", 3147.7 * ( $1 )^(-3.014))}' |& cat`
		if("$temp" == 856) then
		    set program = "$file"
		    break
		endif
	    end
	endif
    end
endif

# agressively search for nawk in likely places
test -x "$program"
if($status) then
    echo -n "Looking for $program "
    foreach place ( /bin /usr/bin /usr/local/bin /usr / )
	test -x "$program"
	if(($status)&&(-e $place)) then
	    if("$place" == "/") echo -n "uhh"
	    
	    # use find to get candidate files
	    set files = `find $place -name '*'$program \( -type l -o \( -type f -size +10000c \) \) -perm -1 -print |& egrep -v "^find:" |& head -20`
	    foreach file ( $files )
		# test for desired functionality
		set temp = `echo "1.54" | $file '{printf("%3d", 3147.7 * ( $1 )^(-3.014))}' |& cat`
		if("$temp" == 856) then
		    set program = "$file"
		    break
		endif
	    end
	endif
	
	# entertainment
	echo -n "."
    end
endif

# check that we found the right awk program
set temp = `echo "1.54" | $program '{printf("%3d", 3147.7 * ( $1 )^(-3.014))}' |& cat`
if("$temp" == 856) then
    # set up this awk program as nawk
    set nawk = "$program"
    alias nawk $nawk
else
    echo "Dagnabbit!  We can't find a suitable awk program.  What kind of unix is this? "
    echo "Elves may not be able to work."
    set nawk = /bin/awk
    alias nawk awk
endif

goto Return_Find_awk

































Deploy_scripts:
# Create the REFMAC script
cat << EOF-refmacscript >! $scriptname
#! /bin/csh -f
#
#	Automatically generated REFMAC script for:
#	 $pdbin and $mtzfile
#
###############################################################################
#   Set-up stuff
#
alias nawk $nawk
#
set pdbin      = $newpdbname
set pdbout     = ./pdb/refmac.pdb
set mtzfile    = $mtzfile
set tempfile   = \${CCP4_SCR}/refmactemp
#
#
# scan command line
foreach arg ( \$* )
    if(\$arg =~ *.mtz) set mtzfile = \$arg
    if(\$arg =~ *.pdb) set pdbin = \$arg
    if(\$arg =~ *.brk) set pdbin = \$arg
    if(\$arg == nice)  renice -n 40 \$\$
end
#
# need these directories
if(! -e logs)    mkdir logs
if(! -e pdb)     mkdir pdb
if(! -e temp)    mkdir temp

# refmac version-spanning options
if("$refmac" == "refmac5") then
    goto refmac
endif

protin:
###############################################################################
#
#   Run protin  to set up geometric restraints
#
\${CCP4_BIN}/protin \
XYZIN      \$pdbin \
DICTPROTN  $dicfile 		\
PROTOUT    \${tempfile}protout.dat		\
PROTCOUNTS \${tempfile}counts.dat		\
<< eof
# automatically determined chains
`cat ${tempfile}chnnam`
#capping groups: 5=acetyl, 4=formyl, 3=amino, 2=carboxyl
`cat ${tempfile}chntyp`

# NCS chains A,B,and C from residues 2-33, 40-77 code 2
# code 1 2 3 4 5 6 
# main t t t m m l
# side t m l m l l
#NONX 3 CHNID A B C NSPANS 2    2 33    2    40 77     2

# of atoms in peptide bond 5 -> Ca to Ca (default)
PEPP 5
SYMM $SG
#VDWRadii 1 CA 7 3.8
# VDW cuttoff distance (5 default)
VDWCUT 5
END
eof
if(\$status) then
    exit 2
endif

refmac:
###############################################################################
#
# Refmac step. Refine
#
$refmac \
HKLIN      \$mtzfile \
HKLOUT     ./refmacout.mtz \
PROTOUT    \${tempfile}protout.dat \
PROTCOUNTS \${tempfile}counts.dat \
PROTSCR    \${tempfile}counts.scr \
XYZIN      \$pdbin \
XYZOUT     \${tempfile}refmacout.pdb \
<< END-OF-REFMAC 
LABIN FP=$F SIGFP=$SIGF  $FREE  $HL
LABO FC=FC PHIC=PHIC    FWT=2FOFCWT PHWT=PH2FOFCWT -
                     DELFWT=FOFCWT  PHDELWT=PHFOFCWT
# RESTRAINED, RIGID, or IDEAL (geometry only)
REFI TYPE RESTrained 
# default: refine agains full resolution range
#REFI RESOLUTION  $lores $hires
# use maximum-likelihood, conjugate-direction refinement (with phases)
REFI RESIDUAL MLKF METHOD CGMAT PHASED
# One, overall B (OVERALL) or individual Bs (ISOTROPIC)
REFI BREF $B_REFINE

#weight of X-ray over geometry (default: WEIG EXPE MATR 0.5)
WEIG EXPE MATR 0.5
# No torsional restraints (combine with low WEIG ? )
#TORSION 0.0
# Bfactor restraints weight
#BFAC 1.0
# NCS restraints weight
#NCSR 1.0

#Blur the phases?
#PHASE SCBL 1 BBLUR 0
# combine experimental phases with calculated ones in output maps
REFI PHASE SIGMAcalc

# Fo-Fc SCALING 
#Scale Fc to Fo with 2-Gaussian fit (TYPE SIMPLE is 1-Gaussian)
SCALE TYPE BULK 
# use an anisotropic, overall B factor (for anisotropic diffraction)
SCALE LSSC ANIS 
# low-res cutoff for scaling (intended for SCALE TYPE SIMPLE)
#SCALE RESO 4.5 $hires

# fix water/protein density ratio to 1/1.35 and water B=80 in scaling
#SCALE LSSC FIXBULK SCBULK -0.74 BBULK 80
# same for SigmaA estimation (NOT recommended)
#SCALE MLSC FIXBULK SCBULK -0.5  BBULK 100

# use free-R hkls in scaling (ignored by default)
#SCALE LSSC FREE
# use experimental sigmas in scaling (ignored by default)
#SCALE LSSC EXPE


# Actual number of REFMAC refinement steps per cycle (5 default)
NCYC 5


# verboseness
MONI FEW
#MONI MEDI HBOND
#MONI MEDI CHIRAL 0.5
#MONI MANY
# 20 display bins by default
#BINS 20
end
END-OF-REFMAC
#
# update the output file
mv \${tempfile}refmacout.pdb \$pdbout
#
# Clean up
rm -f \${tempfile}protout.dat
rm -f \${tempfile}counts.dat
rm -f \${tempfile}counts.scr
#
exit


EOF-refmacscript
chmod a+x $scriptname

rm -f ${tempfile}chnnam >& /dev/null
rm -f ${tempfile}chntyp >& /dev/null















# now unwrap the arp script
cat << EOF-arpscript >! $arpscriptname
#! /bin/csh -f
#
#	$arpscriptname 	- intelligent arp script
#
#
#	this script adapts to most of arp's quirks
#	
#
alias nawk $nawk

set map_2fofc  = maps/2fofc_free.map
set map_fofc   = maps/fofc.map
set pdbin      = $newpdbname	# to add water to
set pdbout     = ./pdb/arp.pdb		# new file with water added

set tempfile   = \${CCP4_SCR}arptemp

# user can override defaults
foreach arg ( \$* )
    if("\$arg" =~ *.mtz) set refmacmtz = "\$arg"
    if("\$arg" =~ *.pdb) set pdbin     = "\$arg"
    if("\$arg" =~ *.map) then
	if("\$arg" =~ *2*) then
	    set map_2fofc = "\$arg"
	else
	    set map_fofc  = "\$arg"
	endif
    endif
end

# need these directories
if(! -e logs)    mkdir logs
if(! -e pdb)     mkdir pdb

if((! -e "\$map_2fofc")||(! -e "\$map_fofc")) then
    echo "ERROR!  no maps: \$map_2fofc AND \$map_fofc needed to run arp."
    exit 9
endif

ARP:
###############################################################################
#
# get ARP's variable names from map file
#
echo "" | mapdump MAPIN \$map_2fofc |\\
grep "Cell dimensions ..." | tail -1 |\\
     nawk '{printf("%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f\\n", \\
              \$(NF-5), \$(NF-4), \$(NF-3), \$(NF-2), \$(NF-1), \$NF)}' |\\
cat >! \${tempfile}cell.txt
set CELL = \`cat \${tempfile}cell.txt\`
#rm -f \${tempfile}dump.txt
rm -f \${tempfile}cell.txt

# change the cell in the PDB so ARP won't freak out.
pdbset XYZIN \$pdbin XYZOUT \${tempfile}arpin.pdb << EOF-fixcell >& /dev/null
CELL \$CELL
EOF-fixcell


# change all occupancies to be < 1 (or ARP will die)
cat \${tempfile}arpin.pdb |\\
nawk '{if(substr(\$0,55,6)+0>1)\\
          {print substr(\$0,1,56) "1.00" substr(\$0,61)}\\
     else {print}}' |\\
cat >! \${tempfile}arptemp.pdb
mv \${tempfile}arptemp.pdb \${tempfile}arpin.pdb


arpagain:
# Now try to run ARP
$arpp XYZIN \${tempfile}arpin.pdb \\
 MAPIN1 \$map_2fofc \\
 MAPIN2 \$map_fofc  \\
 XYZOUT \$pdbout << eof-arpp | tee \${tempfile}arp.log
MODE UPDATE WATERS
CELL \$CELL
SYMM $SG
RESOLUTION $lores $hires
! Removal based on 2mFo-DFc map
# remove about 10 waters that are below 1sigma, and merge ones closer that 2.2 A
REMOVE     ATOMS $REMATOMS CUTSIGMA 1.0 MERGE 2.2

! Addition based on mFo-DFc map
# add up to $ADDATOMS atoms to chain $ARPCHAIN, with automatic sigma cuttoff
FIND       ATOMS $ADDATOMS CHAIN $ARPCHAIN CUTSIGMA AUTO
# new atoms should be within 2.3 to 3.3 A of existing atoms, 
# but not closer than 2.3 A to another new atom (default).
#FDISTANCE  NEWOLD 2.3 3.3 NEWNEW 2.3

# do real-space refinement of waters
REFINE     WATERS
END
eof-arpp
if(\$status) goto ARPerror

# clean up after ARP
rm -f \${tempfile}arpin.pdb
rm -f \${tempfile}arpp_2fofc.map >& /dev/null
rm -f \${tempfile}arpp_fofc.map >& /dev/null

# get rid of bad water
mv \$pdbout \${tempfile}.pdb
nawk '{if(! ((substr(\$0,55,6)=="  0.00")&&(\$4=="WAT"))){print} }'\\
\${tempfile}.pdb >! \$pdbout
rm -f \${tempfile}.pdb


# sumarize for user
cat \${tempfile}arp.log |\\
nawk '/New atoms found/{print "arp found  ", \$2, "new waters"}'
cat \${tempfile}arp.log |\\
nawk '/Number of atoms will be removed/{print "and removed", \$NF, " waters"}'

echo "new pdb is in \$pdbout"
rm -f \${tempfile}arp.log
exit


ARPerror:

echo "ARP died! \\07 "
tail -10 \${tempfile}arp.log

# most common problem: ARPP won't swallow map 
if(! \$?FIXED) then
    set FIXED
    # fix this, and go again
    set xyzlim = \`nawk 'BEGIN{print "xyzlim"} /Map limits/{print \$(NF-1), \$NF}' \${tempfile}arp.log\`
    
    if(\$#xyzlim == 7) then
	echo "fixed error: \$xyzlim"
	echo "please edit $fftscriptname and change:"
	grep xyzlim $fftscriptname | head -1
	echo "to"
	echo "set xyzlim = "\"\$xyzlim\"
	echo "to make this script run a bit faster"
	echo "extending the maps."
	goto extend
    endif
    
    echo "don't know why that happened! "
endif

echo "Stopping. "
echo ""
rm -f \${tempfile}arp.log

exit 8

extend:
mapmask MAPIN \$map_2fofc MAPOUT \${tempfile}arpp_2fofc.map << EOF
\$xyzlim
END
EOF
set map_2fofc = \${tempfile}arpp_2fofc.map
mapmask MAPIN \$map_fofc MAPOUT \${tempfile}arpp_fofc.map << EOF 
\$xyzlim
END
EOF
set map_fofc = \${tempfile}arpp_fofc.map

goto arpagain

EOF-arpscript
chmod a+x $arpscriptname

























# now unwrap fft script
cat << EOF-fftscript >! $fftscriptname
#! /bin/csh -f
#
#	$fftscriptname 	- make refmac's maps
#
#
#	use this script to get the "current" map out of
#	refmacout.mtz, even if refmac is still running
#
alias nawk $nawk

set refmacmtz  = ./refmacout.mtz
set pdbin      = $newpdbname	# PDB to "cover"
set xyzlim     = "$xyzlim"			# important for ARP maps

# need this to make O maps
set MAPMAN = $MAPMAN

set tempfile   = \${CCP4_SCR}ffttemp

# user override defaults
foreach arg ( \$* )
    if("\$arg" =~ *.mtz) set refmacmtz = "\$arg"
    if("\$arg" =~ *.pdb) set pdbin     = "\$arg"
    if("\$arg" =~ *mapman*) set MAPMAN     = "\$arg"
end

if(! -e "\$refmacmtz") then
    echo "ERROR: no \$refmacmtz ! "
    exit 9
endif

FFT:
###############################################################################
#
# make maps.
#
#  Sigmaa style 2mfo-dfc map with FREE R removed (for ARP)
#
echo "making arp's maps/2fofc_free.map"
fft hklin ./refmacout.mtz mapout \${tempfile}.map <<eof | tee \${tempfile}fft.log
title Sigmaa style 2mfo-dfc map calculated with refmac coefficients
labi F1=2FOFCWT PHI=PH2FOFCWT  FREE = FreeR_flag
end
eof
# take a stab at predicting arp's acceptable map
if("\$xyzlim" == "") then
    # add one extra grid point for ARP
    set xyzlim = \`nawk '/Map limits in grid points on xyz/{print "xyzlim", \$(NF-5),\$(NF-4)+1,\$(NF-3),\$(NF-2)+1,\$(NF-1),\$NF+1}' \${tempfile}fft.log | tail -1\`
endif
rm -f \${tempfile}fft.log >& /dev/null
mapmask MAPIN \${tempfile}.map MAPOUT maps/2fofc_free.map << EOF
\$xyzlim
END
EOF
#
#
#  Sigmaa style 2mfo-dfc map _without_ FREE R removed (for O)
#
echo "making maps/2fofc.map"
fft hklin \$refmacmtz mapout maps/2fofc.map <<eof 
title Sigmaa style 2mfo-dfc map calculated with refmac coefficients
labi F1=2FOFCWT PHI=PH2FOFCWT
end
eof
#
#   Sigmaa style mfo-dfc map (FREE R already removed)
#
echo "making maps/fofc.map"
fft hklin \$refmacmtz mapout \${tempfile}.map \\
<<eof 
title Sigmaa style mfo-dfc map calculated with refmac coefficients
labi F1=FOFCWT PHI=PHFOFCWT  
end 
eof
mapmask MAPIN \${tempfile}.map MAPOUT maps/fofc.map << EOF 
\$xyzlim
END
EOF
if(! \$?debug) rm -f \${tempfile}.map



###############################################################################
# convert maps to O format
if(! -e ./o) goto done
if(! -e \$MAPMAN) then
    echo "MAPMAN unavailable! "
    echo "cannot make O maps. "
    goto pick_peaks
endif

echo "making ./o/2fofc.omap covering \$pdbin"
mapmask MAPIN maps/2fofc.map \\
    XYZIN \$pdbin \\
    MAPOUT \${tempfile}.map << eof-extend 
SCALE SIGMA
BORDER 10
eof-extend
set mapsize = \`ls -l \${tempfile}.map | awk '{printf "%d", \$5/3.9}'\`
\$MAPMAN -b mapsize \$mapsize << eof-mapman |& cat
read map1 \${tempfile}.map CCP4
mappage map1 ./o/2fofc.omap
quit
eof-mapman

echo "making ./o/fofc.omap covering \$pdbin"
mapmask MAPIN maps/fofc.map \\
        XYZIN \$pdbin \\
        MAPOUT \${tempfile}.map << eof-extend 
SCALE SIGMA
BORDER 10
eof-extend
set mapsize = \`ls -l \${tempfile}.map | awk '{printf "%d", \$5/3.9}'\`
\$MAPMAN -b mapsize \$mapsize << eof-mapman |& cat
read map1 \${tempfile}.map CCP4
mappage map1 ./o/fofc.omap
quit
eof-mapman
rm -f \${tempfile}.map >& /dev/null
###############################################################################
pick_peaks:
# list difference peaks
echo "picking peaks in maps/fofc.map"
peakmax MAPIN maps/fofc.map XYZOUT \${tempfile}pick.pdb << eof-pick 
THRESHOLD RMS 4 NEGATIVES
OUTPUT BROOKHAVEN
END
eof-pick

# convert to an O macro
cat \${tempfile}pick.pdb |\\
nawk '/^ATOM/{print substr(\$0, 61, 6)+0, substr(\$0, 31, 8), substr(\$0, 39, 8), substr(\$0, 47, 8)}' |\\
sort -nr |\\
nawk '{print "! ", \$1, "sigma peak"; print "centre_xyz", \$2,\$3,\$4; \\
print "sym_sphere ; ; ;";print "@map";}' |\\
cat >! \${tempfile}peaklist
rm -f \${tempfile}pick.pdb >& /dev/null

if(\$?NO_MACROS) goto done
# make some nice O macros for visiting large difference features
echo "generating o macros in ./o/"
cat \${tempfile}peaklist >! ./o/visit_peaks.omac

set temp = \`awk '/^ATOM/{++n;X+=substr(\$0, 31, 8);Y+=substr(\$0, 39, 8);Z+=substr(\$0, 47, 8)} END{if(n) print "cen_x", X/n, Y/n, Z/n}' \$pdbin\`
cat << EOF-latest >! ./o/latest.omac
sam_atom_in \`pwd\`/\$pdbin latest
mol latest
obj latest
zone ;
end
\$temp

paint_ramp atom_b ; blue red
obj Bfac
zone ;
end

sym_set ; ; $SG ; 
sym_cell

menu @visit_peaks.omac on
menu @map on
EOF-latest

if(! -e ./o/map) then
    cat << EOF-map >! ./o/map
map_cache
map_active_centre
map_file \`pwd\`/o/2fofc.omap
map_object sigmaa
map_parameter 15 15 15 1 tan 0.5 0 1
map_draw

map_cache
map_active_centre
map_file \`pwd\`/o/fofc.omap
map_object good
map_parameter 15 15 15 3 green 0.5 0 1
map_draw

map_cache
map_active_centre
map_file \`pwd\`/o/fofc.omap
map_object bad
map_parameter 15 15 15 -3 red 0.5 0 1
map_draw

EOF-map
endif

done:
# list the difference peaks
cat \${tempfile}peaklist |\\
nawk '/sigma/{printf "%s | ", \$0} /cen/{print}'
rm -f \${tempfile}peaklist >& /dev/null
echo "maps ready."
exit

EOF-fftscript
chmod a+x $fftscriptname













# unwrap the auto-converge script
cat << EOF-arpscript >! $convergescript
#! /bin/csh -f
#
#	Auto-convergeing refmac/arp engine
#
alias nawk $nawk

set pdbin      = $newpdbname
set mtzfile    = $mtzfile

setenv CCP4_SCR   ./temp
set tempfile   = \${CCP4_SCR}/refmactemp
#
set refmacscript = ./scripts/refmac.com
set arpscript    = ./scripts/arp.com
set fftscript    = $fftscriptname
set rmsdscript   = ./scripts/rmsd
#
# scan command line
foreach arg ( \$* )
    if(\$arg =~ *.mtz) set mtzfile = \$arg
    if(\$arg =~ *.pdb) set pdbin = \$arg
    if(\$arg =~ *.brk) set pdbin = \$arg
    if((\$arg =~ [1-9]*)&&(\$arg =~ *[0-9])) set cycle = \$arg
    if(\$arg == "noarp") set NO_ARP
    if(\$arg == nice)  renice -n 40 \$\$
end
#
# Cycle Numbers for running arp, or quitting 
set cycle     = 0
# number of refmac cycles to run between saving pdb
set savecycle = 10
# FFT will run every "fftcycle" refmac cycles (regardless of convergence)
set fftcycle  = 10
# ARP will run every "arpcycle" refmac cycles (regardless of convergence)
set arpcycle  = 1000
# this script will quit after "maxcycle" $refmac cycles
set maxcycle  = 1000

# get cycle number from input PDB filename
set temp = \`echo \$pdbin | nawk '{for(i=1;i<length(\$1);++i){ c=substr(\$1,i,1); if(c ~ /[0-9]/)printf c }}'\`
if("\$temp" != "") then
    set cycle = \$temp
endif
@ cycle = ( \$cycle + 1 )
#
#
#
# make needed directories
if(! -e maps)    mkdir maps
if(! -e logs)    mkdir logs
if(! -e pdb)     mkdir pdb
if(! -e temp)    mkdir temp

REFMAC:
###############################################################################
#
# refine the structure for \$SUPERNCYC steps
#
# reset convergence variables
set Rslope = 0.1
set RMSxyz = 0.1
set RMSDB  = 0.1
#
echo "refmac cycle \$cycle"
./\$refmacscript \$mtzfile \$pdbin >! logs/refmac\${cycle}.log
if(\$status) goto done
set pad_cycle = \`echo "\$cycle \$maxcycle" | nawk '{printf "%0"length(\$2)"d", \$1}'\`
mv pdb/refmac.pdb pdb/refmacin.pdb
if((\$cycle % \$savecycle) == 0) cp pdb/refmacin.pdb pdb/refmac\${pad_cycle}.pdb
#
# evaluate change in the model (rmsCA, rmsXYZ, rmsB, maxXYZ, maxB)
set temp = \`./\$rmsdscript xlog=on \$pdbin pdb/refmac\${pad_cycle}.pdb \`
if(\$#temp > 4) then
    set RMSxyz = "\$temp[2]"
    set RMSDB  = "\$temp[3]"
else
    echo "\$0: unable to determine model shifts! "
endif

# prepare for next cycle
set pdbin = pdb/refmacin.pdb

# check for maximum cycle reached
if(\$cycle > \$maxcycle) goto done

# if refinement is getting nowhere, go to ARP
if("\$RMSxyz" =~ 0.000*) set RUN_ARP
#if("\$RMSDB" =~ 0.0*) set RUN_ARP
#if(("\$RMSDB" =~ 0.0*)&&("\$RMSxyz" =~ 0.000*)) set RUN_ARP

if(\$?RUN_ARP) then
    echo "refmac converged! "
endif

# check cycle counts
if((\$cycle % \$arpcycle) == 0) set RUN_ARP
if((\$cycle % \$fftcycle) == 0) goto FFT

if(\$?RUN_ARP) then
    goto FFT
endif

# repeat cycles of refinement
@ cycle = ( \$cycle + 1 )

goto REFMAC




FFT:
###############################################################################
#
# make maps.
#
# defaults to ./maps/2fofc.map ./maps/fofc.map and ./maps/2fofc_free.map 
# plus ./o/2fofc.omap and ./o/fofc.omap 
#
echo "calculating maps"
./\$fftscript \$pdbin >! ./logs/fft.log
if(\$status) goto done

if(\$?RUN_ARP) then
    unset RUN_ARP
    goto ARP
endif

# if we get here, this was just an fft cycle

# repeat cycles of refinement
@ cycle = ( \$cycle + 1 )

goto REFMAC



ARP:
if(\$?NO_ARP) goto done
###############################################################################
#
#   use ARP to add and remove waters
#
echo "running arp"
./\$arpscript ./maps/2fofc_free.map ./maps/fofc.map \$pdbin >! logs/arpp\${cycle}.log
if(\$status) goto done
mv ./pdb/arp.pdb pdb/arpp\${cycle}.pdb

tail -3 logs/arpp\${cycle}.log | head -2

# handle convergence loop
set pdbin = pdb/arpp\${cycle}.pdb

@ cycle = ( \$cycle + 1 )

# edit maps.com if arp complains?

goto REFMAC


done:
###############################################################################
#
#   exiting message
#

echo "REFMAC/ARP stopped!  \\07 "
echo ""

exit

EOF-arpscript
chmod a+x $convergescript




















# now unwrap rmsd awk script
cat << EOF-RMSDSCRIPT >! $RMSD
#! $nawk -f
#
#   Calculate RMSD of atoms with the same name in two PDB files
#
#	The PDB feild:
#           |<--- here -->|
#ATOM      1  N   ALA A 327      40.574  34.523  43.012  1.00 34.04
#
#	is used to determine if two atoms are a "pair"
#
BEGIN {
if(! atom) atom = "CA"
maxXYZ = maxdB = 0
max_atom_XYZ = max_atom_dB = 0
maxXYZ_ID = maxdB_ID = "-"
max_atom_XYZ_ID = max_atom_dB_ID = "-"
}

/^ATOM/{
    # read in values (watching for duplications)
    ID = substr(\$0,12,15)
    ++count[ID]

    if(count[ID] == 1)
    {
	# not initialized yet
	X[ID] = substr(\$0, 31, 8)+0
	Y[ID] = substr(\$0, 39, 8)+0
	Z[ID] = substr(\$0, 47, 8)+0
	B[ID] = substr(\$0, 61, 6)+0
    }
    
    if(count[ID] == 2)
    {
	++pairs
    
	# seen this before, subtract values
	dX     = X[ID] - substr(\$0, 31, 8)
	dY     = Y[ID] - substr(\$0, 39, 8)
	dZ     = Z[ID] - substr(\$0, 47, 8)
	dB[ID] = B[ID] - substr(\$0, 61, 6)
	
	# get drift (and add up squares of drifts)
	sqrD   = dX*dX + dY*dY + dZ*dZ
	dXYZ[ID] = sqrt(sqrD)

	# remember maximum shifts
	if(dXYZ[ID] > maxXYZ) {maxXYZ  = dXYZ[ID]; maxXYZ_ID = ID }
	if(dB[ID]*dB[ID] > maxdB*maxdB) {maxdB = dB[ID]; maxdB_ID = ID }

	# maintain mean-square sums	
	sumXYZ += sqrD
	sumB   += dB[ID]*dB[ID]

	# separate stats for special atom type
	if(ID ~ atom)
	{
	    ++atom_pairs

	    # maintain separate mean-square sums
	    sum_atom_XYZ += sqrD
	    sum_atom_B   += dB[ID]*dB[ID]
	    
	    # remember maximum drifts too
	    if(dXYZ[ID] > max_atom_XYZ) {max_atom_XYZ  = dXYZ[ID]; max_atom_XYZ_ID = ID }
	    if(dB[ID]*dB[ID] > max_atom_dB*max_atom_dB) {max_atom_dB = dB[ID]; max_atom_dB_ID = ID }	    
	}
	# debug output
	if(debug)
	{
	    printf("%s moved %8.4f (XYZ) %6.2f (B)\\n", ID, dXYZ[ID], dB[ID])
	}	
    }

    if(count[ID] > 2)
    {
	print "WARNING: " ID " appeared more than twice! "
    }
}


END{
    
    if((pairs+0 == 0)&&(! xlog)) 
    {
	print "no atom pairs found"
	exit
    }
    rmsXYZ = sqrt(sumXYZ/pairs)
    rmsB = sqrt(sumB/pairs)
    if(atom_pairs+0 != 0)
    {
	rms_atom_XYZ = sqrt(sum_atom_XYZ/atom_pairs)
	rms_atom_B = sqrt(sum_atom_B/atom_pairs)
    }
    

    if(! xlog) 
    {
	print pairs " atom pairs found"
	print "RMSD("atom" )= " rms_atom_XYZ " ("atom_pairs, atom " pairs)"
	print "RMSD(all)= " rmsXYZ " ("pairs" atom pairs)"
	print "RMSD(Bfac)= " rmsB

	print "MAXD(all)= " maxXYZ "\\tfor " maxXYZ_ID
	print "MAXD(Bfac)= " maxdB "\\tfor " maxdB_ID
	
	# final check for orphan atoms 
	for(id in count)
	{
	    if(count[id]<2) print "WARNING: " id " only found once"
	}
    }
    else
    {
	printf "%10.8f %10.8f %10.5f %10.8f %8.2f \\n", rms_atom_XYZ, rmsXYZ, rmsB, maxXYZ, maxdB
    }
}

EOF-RMSDSCRIPT
chmod a+x $RMSD

















# unwrap analysis scripts
cat << EOF-Rplot >! ./scripts/Rplot.com
#! /bin/csh -f
#
#   Plot R-factors from a REFMAC log in xloggraph format
#
#
alias nawk /usr/bin/nawk
if(\`uname\` == Linux) alias nawk awk
if(! \$?CBIN) set CBIN = "/programs/ccp4/bin"

set logs
set sort = rt
foreach arg ( \$* )
    if(\$arg =~ paste*) then
	set sort = ""
    endif
end
echo ""
ls -1\$sort \$*
set i = 1
echo ""
echo "<html><pre>Stats"
echo '<applet width=" 700" height=" 300" code="JLogGraph.class" codebase="'\$CBIN'"><param name="table" value="'
echo ' \$TABLE : Stats:'
echo ' \$GRAPHS:R factors:A:1, 2, 3: '
echo ':R diff:A:1, 4: '
echo ':R cryst:A:1, 2: '
echo ':R free:A:1, 3: '
echo ':rms bond length deviation:N:1, 5: '
echo ':rms angle deviation:N:1, 6: '
echo ':number of atoms:A:1, 7: \$\$ '
onintr finish
if(\$#argv == 1) then
    echo 'cycle Rcryst  Rfree  Rfree-Rcrys  bonds angles  atoms \$\$ '
    echo '\$\$ '
    # do refinement cycles
    cat \$1 |\\
    nawk '/Number of atoms read/{atoms = \$NF} \\
	/Estimated bond angle/{angles=\$NF}  /NUMBER   DIST   DELTA/{getline; bonds=\$5} \\
	/Bond distances: refined atoms/{bonds=\$6} \\
	/Bond angles  : refined atoms/{angles=\$7} \\
	/Number of atoms       /{atoms=\$NF} \\
	/weighted/ || /based on / {next}\\
	/all[_ ]R[_ ]factor/{R = \$NF; ++i} \\
	/ree[_ ]R[_ ]factor/{RF = \$NF} \\
	/Overall figure of merit/ || /Precision indication from R values/{fulline=1}\\
	fulline{fulline=0;\\
	   printf "%-5d %6.3f %6.3f %6.3f       %6.3f %4.1f  %6d \\n",  i, R*100, RF*100, 100*(RF-R), bonds, angles, atoms}'
else
    echo 'pdb  Rcryst  Rfree  Rfree-Rcrys  bonds angles  atoms \$\$ '
    echo '\$\$ '
    foreach X ( \`ls -1\$sort \$* \` )
	set number = \`echo \$X | nawk '{for(i=1;i<length(\$1);++i){ c=substr(\$1,i,1); if(c ~ /[0-9]/)printf c }}'\`

	# speed things up a bit
	set atoms = \`nawk '/Number of atoms read/ || /Number of atoms       /{print \$NF; exit}' \$X\`
	if("\$atoms" == "") continue
	tail -4000 \$X |\\
	nawk -v number=\$number -v atoms=\$atoms '/all[_ ]R[_ ]factor/{R = \$NF} /ree[_ ]R[_ ]factor/{RF = \$NF} \\
             /Estimated bond angle/{angles=\$NF}  /NUMBER   DIST   DELTA/{getline; bonds=\$5} \\
	     /Bond distances: refined atoms/{bonds=\$6} \\
	     /Bond angles  : refined atoms/{angles=\$7} \\
	    END{if(R=="") exit; \\
	    printf "%-5d %6.3f %6.3f %6.3f      %6.3f %4.1f  %6d \\n", number, R*100, RF*100, (RF-R)*100, bonds, angles, atoms}' 
    end
endif

finish:
echo '\$\$'
echo '"><b>For inline graphs use a Java browser</b></applet>'
echo "</pre></html>"
EOF-Rplot
chmod a+x ./scripts/Rplot.com




























cat << EOF-Drift >! ./scripts/Drift.com
#! /bin/csh -f
#
#   Plot "drift" of a series of PDB files, relative to each other
#
#goto skip
set nawk = $nawk
\$nawk 'BEGIN{exit}' >& /dev/null
if(\$status) set nawk = \`which awk\`

alias nawk \$nawk

if(! \$?CBIN) set CBIN = "/programs/ccp4/bin"

set refcycles = 10
set rmsd      = ./scripts/rmsd

if(\$#argv == 1) then

endif

plot:
echo "<html><pre>Steps"
echo '<applet width=" 700" height=" 300" code="JLogGraph.class" codebase="'\$CBIN'"><param name="table" value="'
echo ' \$TABLE : Steps:'
echo ' \$GRAPHS:RMSDs:A:1, 2, 3, 4: '
echo ':RMS CA:A:1, 2: '
echo ':RMS xyz:A:1, 3: '
echo ':MAX xyz:A:1, 5: '
echo ':RMS B:A:1, 4: '
echo ':MAX B:A:1, 6: \$\$ '
echo 'pdb  RMSD_CA    RMSD_xyz      RMSD_B  MAX_xyz        MAX_B \$\$ '
echo '\$\$ '
set count = 0
foreach X ( \`ls -1rt \$* \` )
    # keep track of number of PDBs processed
    # get number of current file
    set number = \`echo \$X | nawk '{for(i=1;i<length(\$1);++i){ c=substr(\$1,i,1); if(c ~ /[0-9]/)printf c }}'\`
    # synchronize internal count with number from file
    if("\$number" == "") then
	set number = \$count
    else
	set count = \$number
    endif
    # print it out
    echo \$number | nawk '{printf "%-5d", \$1}'
    # now get name of the "previous" pdb file
    if(! \$?lastpdb) set lastpdb = \$X
    
    cat \$lastpdb \$X | grep -v WAT | \$rmsd xlog=on
    # now the "current" file is the "last" file for the next pdb
    set lastpdb = \$X
    @ count = ( \$count + 1 )
end
echo '\$\$'
echo '"><b>For inline graphs use a Java browser</b></applet>'

skip:
# differences from first file in list
set ref = \`ls -1rt \$* | head -1\`

echo 'Diffs from '\$ref':'
echo '<applet width=" 700" height=" 300" code="JLogGraph.class" codebase="'\$CBIN'"><param name="table" value="'
echo ' \$TABLE : Diffs from '\$ref':'
echo ' \$GRAPHS:RMSDs:A:1, 2, 3, 4: '
echo ':RMS CA:A:1, 2: '
echo ':RMS xyz:A:1, 3: '
echo ':MAX xyz:A:1, 5: '
echo ':RMS B:A:1, 4: '
echo ':MAX B:A:1, 6: \$\$ '
echo 'pdb  RMSD_CA    RMSD_xyz      RMSD_B  MAX_xyz        MAX_B \$\$ '
echo '\$\$ '
set count = 0
foreach X ( \`ls -1rt \$* \` )
    # try to get number from the filename
    set number = \`echo \$X | nawk '{for(i=1;i<length(\$1);++i){ c=substr(\$1,i,1); if(c ~ /[0-9]/)printf c }}'\`
    # otherwise, synchronize internal count with number from file
    if("\$number" == "") then
	set number = \$count
    else
	set count = \$number
    endif
    # print it out
    echo \$number | nawk '{printf "%-5d", \$1}'
    cat \$ref \$X | grep -v WAT | \$rmsd xlog=on
    @ count = ( \$count + 1 )
end
echo '\$\$'
echo '"><b>For inline graphs use a Java browser</b></applet>'


# diffs from LAST pdb in the list

set ref = \`ls -1rt \$* | tail -1\`

echo 'Diffs from '\$ref':'
echo '<applet width=" 700" height=" 300" code="JLogGraph.class" codebase="'\$CBIN'"><param name="table" value="'
echo ' \$TABLE : Diffs from '\$ref':'
echo ' \$GRAPHS:RMSDs:A:1, 2, 3, 4: '
echo ':RMS CA:A:1, 2: '
echo ':RMS xyz:A:1, 3: '
echo ':MAX xyz:A:1, 5: '
echo ':RMS B:A:1, 4: '
echo ':MAX B:A:1, 6: \$\$ '
echo 'pdb  RMSD_CA    RMSD_xyz      RMSD_B  MAX_xyz        MAX_B \$\$ '
echo '\$\$ '
set count = 0
foreach X ( \`ls -1rt \$* \` )
    # try to get number from the filename
    set number = \`echo \$X | nawk '{for(i=1;i<length(\$1);++i){ c=substr(\$1,i,1); if(c ~ /[0-9]/)printf c }}'\`
    # otherwise, synchronize internal count with number from file
    if("\$number" == "") then
	set number = \$count
    else
	set count = \$number
    endif
    # print it out
    echo \$number | nawk '{printf "%-5d", \$1}'
    cat \$ref \$X | grep -v WAT | \$rmsd xlog=on
    @ count = ( \$count + 1 )
end
echo '\$\$'
echo '"><b>For inline graphs use a Java browser</b></applet>'

if(-e ./rmsd\$\$) rm -f ./rmsd\$\$
echo "</pre></html>"
exit

EOF-Drift
chmod a+x ./scripts/Drift.com


echo "created suitable refmac script as $scriptname"
echo "created suitable fft script as    $fftscriptname"
echo "created suitable arp script as    $arpscriptname"
echo "$RMSD can be used to get the rmsd between two pdbs"
echo "./scripts/Drift.com will plot the drift of a series of pdbs"
echo "./scripts/Rplot.com will plot the R-factors of refmac logs"
echo "$convergescript will iteratively run all the above scripts"
exit


































Unwrap_awk_Scripts:

cat << EOF-mtzstuff >! ${tempfile}mtzstuff.awk
#! $nawk -f
#
#
#	Organize info from an mtzdump in more accessible format
#
#
#
# resolution limits
/Resolution Range/ {
    getline; getline;
    hires = \$6;
    lores = \$4;
}

# cell
/Cell Dimensions/ {
    getline; getline;
    cell = \$0
}

# space group
/Space group/{
    gsub("\\047","");
    SG = \$5
}

/Column Labels/{
    getline; getline;
    while(NF>0)
    {
	for(i=1;i<=NF;++i)
	{
	    ++labels;
	    label[labels] = \$i
	}
	getline
    }
}

/Column Types/{
    getline; getline;
    while(NF>0)
    {
	for(i=1;i<=NF;++i)
	{
	    ++t;
	    type[t] = \$i
	}
	getline
    }
}

/OVERALL FILE STATISTICS/,/LIST OF REFLECTIONS/{
    for(l in label)
    {
	if(\$NF == label[l])
	{
	    # retrieve interesting numbers from the summary list
	    mean[l] = \$(NF-4)+0
	    completeness[l] = substr(\$0, 32)+0
	}
    }
}



END {
    # now print everything out
    print "CELL", cell
    print "SYMM", SG
    print "RESO", lores, hires
    for(l=1;l<=labels;++l)
    {
	printf "COL %-15s %s %6.2f %6.2f%%\\n", label[l], type[l], mean[l], completeness[l]
    }
    
    # now try to equate Fs and DANOs with their sigmas
    for(l=1;l<=labels;++l)
    {
	# structure factors
	if(type[l]=="F")
	{
	    ++fs; 
	    F[fs]=label[l];
	    meanF[F[fs]]=mean[l];

	    last = F[fs];
	}
	
	# anomalous differences (in F units)
	if(type[l]=="D")
	{
	    ++ds; 
	    D[ds]=label[l];
	    meanD[D[ds]]=mean[l];

	    last = D[ds];
	}
	
	# sigmas (of anything)
	if(type[l]=="Q")
	{
	    ++ss;
	    Q[ss]=label[l];
	    meanQ[Q[ss]]=mean[l]; 
	    
	    # putatively assign sigmas to most recent F (almost always right)
	    sigma[last]=Q[ss];
	}
    }

    # now go see if any sigmas have nearly identical names with an F or DANO
    for(s=1;s<=ss;++s)
    {
	# run down all Fs
	for(f=1;f<=fs;++f)
	{   
	    # look for SIG(name) to match name
	    if(Q[s] == "SIG" F[f])
	    {
		sigma[F[f]]=Q[s];
	    }
	}
	
	# same for DANOs
	for(d=1;d<=ds;++d)
	{
	    if(Q[s] == "SIG" D[d])
	    {
		sigma[D[d]]=Q[s];
	    }
	}
    }
    
    # now print out putative pairs
    for(f=1;f<=fs;++f)
    {
	if(meanQ[sigma[F[f]]]!=0)
	{
	    meanF[F[f]]=meanF[F[f]]/meanQ[sigma[F[f]]];
	}
	else
	{
	    meanF[F[f]]="";
	}
      
	printf "F: %-10s %-10s %-10s\\n", F[f], sigma[F[f]], meanF[F[f]];
    }
    for(d=1;d<=ds;++d)
    {
	if(meanQ[sigma[D[d]]]!=0)
	{
	    meanD[D[d]]=meanD[D[d]]/meanQ[sigma[D[d]]];
	}
	else
	{
	    meanD[D[d]]="";
	}
      
	printf "D: %-10s %-10s %-10s\\n", D[d], sigma[D[d]], meanD[D[d]];
    }
}
EOF-mtzstuff
chmod a+x ${tempfile}mtzstuff.awk


cat << EOF-reformatpdb >! ${tempfile}reformatpdb.awk
#! $nawk -f
#
#
# Re-writes PDB into something more canonnical
# 
BEGIN {

# memorize ordering of the alphabet
alphabet[1]="A"; alphabet[2]="B"; alphabet[3]="C"; alphabet[4]="D"; alphabet[5]="E"; alphabet[6]="F"; alphabet[7]="G"; alphabet[8]="H"; alphabet[9]="I"; alphabet[10]="J"; alphabet[11]="K"; alphabet[12]="L"; alphabet[13]="M"; alphabet[14]="N"; alphabet[15]="O"; alphabet[16]="P"; alphabet[17]="Q"; alphabet[18]="R"; alphabet[19]="S"; alphabet[20]="T"; alphabet[21]="U"; alphabet[22]="V"; alphabet[23]="W"; alphabet[24]="X"; alphabet[25]="Y"; alphabet[26]="Z"
alphabet["A"]=1; alphabet["B"]=2; alphabet["C"]=3; alphabet["D"]=4; alphabet["E"]=5; alphabet["F"]=6; alphabet["G"]=7; alphabet["H"]=8; alphabet["I"]=9; alphabet["J"]=10; alphabet["K"]=11; alphabet["L"]=12; alphabet["M"]=13; alphabet["N"]=14; alphabet["O"]=15; alphabet["P"]=16; alphabet["Q"]=17; alphabet["R"]=18; alphabet["S"]=19; alphabet["T"]=20; alphabet["U"]=21; alphabet["V"]=22; alphabet["W"]=23; alphabet["X"]=24; alphabet["Y"]=25; alphabet["Z"]=26
alphabet[0]=" "; alphabet[" "]=0;

# canonical ordering of atom types in PDB
align[1]  = "N"  ; align[2]  = "A " ; align[3]  = "C"  ; align[4]  = "O"  ;
align[5]  = "B " ; align[6]  = "G " ; align[7]  = "G1" ; align[8]  = "G2" ; 
align[9]  = "G3" ; align[10] = "D " ; align[11] = "D1" ; align[12] = "D2" ; 
align[13] = "D3" ; align[14] = "E " ; align[15] = "E1" ; align[16] = "E2" ;
align[17] = "E3" ; align[18] = "Z " ; align[19] = "Z1" ; align[20] = "Z2" ;
align[21] = "Z3" ; align[22] = "H " ; align[23] = "H1" ; align[24] = "H2" ;
align[25] = "H3" ;

}

/^ATOM/ || /^HETATM/ {

    if(debug) print tolower(\$0)

#######################################################################################
    electrons = substr(\$0, 67,6)	# number of electrons in this atom (not always there)
    XPLORSegid = substr(\$0, 73, 4)    	# XPLOR-style segment ID
    split(XPLORSegid, a)		# (remove spaces)
    XPLORSegid = a[1];
    Element = substr(\$0, 67)		# sometimes element is given here

    Atomnum= substr(\$0,  7, 5)+0	# atom number
    Element= substr(\$0, 13, 2);		# actual element number
    Greek= substr(\$0, 15, 2);		# "remoteness" number of this atom (i.e. "A" for C-alpha)
    split(Element Greek, a)		# (remove spaces)
    Atom   = a[1];			# store whole atom name
    Conf   = substr(\$0, 17, 1)		# conformer letter
    Restyp = substr(\$0, 18, 3)		# residue name
    Segid  = substr(\$0, 22, 1)    	# O/Brookhaven-style segment ID
    Resnum = substr(\$0, 23, 4)		# residue number
    X      = substr(\$0, 31, 8)+0	# coordinates
    Y      = substr(\$0, 39, 8)+0
    Z      = substr(\$0, 47, 8)+0
    Occ    = substr(\$0, 55, 6)+0	# occupancy
    Bfac   = substr(\$0, 61, 6)+0	# B-factor
#   rest   = substr(\$0, 67)		# rest of the line after B-factor?
    ATOM   = toupper(substr(\$0, 1, 6))	# store given atom name
    ID     = substr(\$0,12,15)

#######################################################################################
#   correct for alternate formatting

    if((Segid == " ") && (substr(XPLORSegid,1,1) ~ /[A-Z]/))
    {
	Segid = substr(XPLORSegid,1,1)
    }
    if(Resnum ~ /[A-Z]/)
    {
	# incorrect residue numbers: A14, etc.
	Segid = substr(Resnum,match(Resnum,"[A-Z]"),1);
	Resnum = substr(Resnum,match(Resnum,"[A-Z]")+1);
    }
    Resnum+=0;

    # fix X-plor's non-standard MET
    if(Restyp == "MSE")
    {
#	Restyp = "MET"
	if((Greek == "E ")&&(Element == " S"))
	{
	    Element = "SE"
	    Greek = "D "
	}
    }

    if(electrons+0 == 0)
    {
	if(Element == " C") electrons = 6
	if(Element == " O") electrons = 8
	if(Element == " N") electrons = 7
	if(Element == " H") electrons = 1
	if(Element == " S") electrons = 16
	if(Element == "SE") electrons = 34
    }

#######################################################################################
# user-directed globlal changes
    if(BFAC != "") 
    {
        if(BFAC !~ /^[+-]/) Bfac = 0
        Bfac += BFAC
    }
    if(OCC != "")
    {
        if(OCC !~ /^[+-]/) Occ = 0
        Occ += OCC
    }
    if(CHAIN != "") Segid = CHAIN
    if(renumber)
    {
	++ATOMNUM
	Atomnum = ATOMNUM
    }
    # increment/decrement Segids?
    if(map[Segid]=="") map[Segid]=Segid
    ID = substr(ID,1,10) map[Segid] substr(ID,12)
    seen[ID]=seen[ID]+1
    while((seen[ID]>1)&&(Segid != "Z")&&(1))
    {
	map[Segid] = alphabet[alphabet[map[Segid]]+1]
	ID = substr(ID,1,10) map[Segid] substr(ID,12)
	seen[ID]=seen[ID]+1
    }
    Segid=map[Segid]

#######################################################################################
    if(Resnum != lastResnum)
    {
	for(i=1;i<30;++i)
	{
	    if((residue[align[i]] != "")&&(reorder))
	    {
		print residue[align[i]]
	    }
	}
	# clear for next time
	for(x in residue) residue[x] = ""
    }
    lastResnum = Resnum
    
    order = Greek
    if(order == "  ") order = Atom
    residue[order] = sprintf("%6s%5d %2s%-2s%1s%3s %1s%4d     %7.3f %7.3f %7.3f %5.2f%6.2f%4d  %-4s%2s",\\
        ATOM, Atomnum,Element,Greek,Conf,Restyp,Segid,Resnum,X,Y,Z,Occ,Bfac,electrons,XPLORSegid,Element);
    
#######################################################################################
    # default, non-reordering mode
    if(!reorder)
    {
	printf("%6s%5d %2s%-2s%1s%3s %1s%4d     %7.3f %7.3f %7.3f %5.2f%6.2f%4d  %-4s%2s\\n",ATOM, Atomnum,Element,Greek,Conf,Restyp,Segid,Resnum,X,Y,Z,Occ,Bfac,electrons,XPLORSegid,Element);	
    }
}

! /^ATOM/ && ! /^HETATM/

END{
	for(i=1;i<30;++i)
	{
	    if((residue[align[i]] != "")&&(reorder))
	    {
		print residue[align[i]]
	    }
	}    
}
EOF-reformatpdb
chmod a+x ${tempfile}reformatpdb.awk








cat << EOF-phipsi >! ${tempfile}phipsi.awk
#! $nawk -f
#
#	Calculate phi-psi angles for a pdb file
#
#	
#
BEGIN{
    # put "0th" CA in outer space
    CA["X"]=999999999;
    CA["Y"]=999999999;
    CA["Z"]=999999999;
}

/^ATOM/{
    Element= substr(\$0, 13, 2);
    Greek= substr(\$0, 15, 2);
    split(Element Greek, a)
    Atom   = a[1];
    Restyp = substr(\$0, 18, 3)
    Segid  = substr(\$0, 22, 1)    	# O/Brookhaven-style segment ID
    Resnum = substr(\$0, 23, 4)+0
    X      = substr(\$0, 31, 8)+0
    Y      = substr(\$0, 39, 8)+0
    Z      = substr(\$0, 47, 8)+0

    if(Segid == " ") Segid = "_"

    if((Atom == "CA")||(Atom == "CT2"))
    {
	# store XYZ of last 2 Ca atoms
	lastCA["X"] = CA["X"]; lastCA["Y"] = CA["Y"]; lastCA["Z"] = CA["Z"]; 
	CA["X"] = X; CA["Y"] = Y; CA["Z"] = Z;
	# flag to know when we're in "real" protein
	one_CA_read = 1
    }
    if((Atom == "C")||(Atom == "CT1"))
    {
	# store XYZ of last 2 carbonyl carbons
	lastC["X"] = C["X"]; lastC["Y"] = C["Y"]; lastC["Z"] = C["Z"]; 
	C["X"] = X;  C["Y"] = Y;  C["Z"] = Z; 
    }
    if((Atom == "N" )||(Atom == "OT"))
    {
	# store XYZ of last 2 amino nitrogens
	N["X"] = nextN["X"]; N["Y"] = nextN["Y"]; N["Z"] = nextN["Z"]; 
	nextN["X"] = X;  nextN["Y"] = Y;  nextN["Z"] = Z;
    }
    
    # print phi-psi when we reach the N of the NEXT residue
    if((Atom == "N") && (one_CA_read))
    {
	# don't print out unrealistic CA-CA distances
	CA_CA = sqrt((lastCA["X"]-CA["X"])^2 + (lastCA["Y"]-CA["Y"])^2 + (lastCA["Z"]-CA["Z"])^2)
	# don't print out unrealistic psi distances either
	CA_nN = sqrt((CA["X"]-nextN["X"])^2 + (CA["Y"]-nextN["Y"])^2 + (CA["Z"]-nextN["Z"])^2)
	
	phi=psi=0;
	if(CA_CA < 10)
	{
	    phi = dihedral(lastC, N, CA, C);
	}	
	if(CA_nN < 10)
	{
	    psi = dihedral(N, CA, C, nextN);
	}
	pep = dihedral(lastCA, lastC, N, CA);
	
	# print residue phi and psi:
	if((CA_CA < 10)||(CA_nN < 10))
	{
	    printf "%3s %s %-4d phi= %7.2f   psi= %7.2f   pep= %7.2f\n",\
		 lastRestyp, lastSegid, lastResnum,phi,psi,pep;
	}
    }
    lastResnum = Resnum
    lastRestyp = Restyp
    lastSegid  = Segid
    lastX = X
    lastY = Y
    lastZ = Z
}
END{print ""}



function dihedral(atom1,atom2,atom3,atom4) {
#
#     O -atom1                     O - atom4
#      \\                          /
#       \\                        /
#        \\                      /
#         \\                    /
#          \\      chi = 0     /
#    atom2- O -------------- O -atom3
#
# atom1, atom2, atom3, atom4, are 3-membered arrays ["X","Y","Z"]
# return value is chi
#
    
    # we need to construct a basis for converting them to "global" coordinates
    
    # get vector of first dihedral bond
    bond1["X"] = atom1["X"]-atom2["X"]
    bond1["Y"] = atom1["Y"]-atom2["Y"]
    bond1["Z"] = atom1["Z"]-atom2["Z"]
    
    # get vector of "second" (rotating axis) bond
    axis["X"]  = atom3["X"]-atom2["X"]
    axis["Y"]  = atom3["Y"]-atom2["Y"]
    axis["Z"]  = atom3["Z"]-atom2["Z"]

    # get vector of "third" dihedral bond
    bond3["X"] = atom4["X"]-atom3["X"]
    bond3["Y"] = atom4["Y"]-atom3["Y"]
    bond3["Z"] = atom4["Z"]-atom3["Z"]
    
    # normalize the "axis" to unit length
    norm = sqrt(axis["X"]^2 + axis["Y"]^2 + axis["Z"]^2)
    if(norm == 0) return "axis error"
    axis["X"] = axis["X"]/norm
    axis["Y"] = axis["Y"]/norm
    axis["Z"] = axis["Z"]/norm
    
    # reduce "bond" to their components perpendicular to the "axis"
    component  = bond1["X"]*axis["X"] + bond1["Y"]*axis["Y"] + bond1["Z"]*axis["Z"]
    bond1["X"] = bond1["X"]-component*axis["X"]
    bond1["Y"] = bond1["Y"]-component*axis["Y"]
    bond1["Z"] = bond1["Z"]-component*axis["Z"]
    
    component  = bond3["X"]*axis["X"] + bond3["Y"]*axis["Y"] + bond3["Z"]*axis["Z"]
    bond3["X"] = bond3["X"]-component*axis["X"]
    bond3["Y"] = bond3["Y"]-component*axis["Y"]
    bond3["Z"] = bond3["Z"]-component*axis["Z"]
    
    
    # now the angle between bond1 and bond3 is the dihedral angle
    

    # normalize the first and last bond vectors
    norm = sqrt(bond1["X"]^2 + bond1["Y"]^2 + bond1["Z"]^2)
    if(norm == 0) return "bond1 error"
    bond1["X"] = bond1["X"]/norm
    bond1["Y"] = bond1["Y"]/norm
    bond1["Z"] = bond1["Z"]/norm
    
    norm = sqrt(bond3["X"]^2 + bond3["Y"]^2 + bond3["Z"]^2)
    if(norm == 0) return "bond3 error"
    bond3["X"] = bond3["X"]/norm
    bond3["Y"] = bond3["Y"]/norm
    bond3["Z"] = bond3["Z"]/norm


    # construct a vector perpendicular to both the axis and bond1
    # (this differentiates "sides" of the dihedral)
    chi90["X"] = axis["Y"] * bond1["Z"] - axis["Z"] * bond1["Y"];
    chi90["Y"] = axis["Z"] * bond1["X"] - axis["X"] * bond1["Z"];
    chi90["Z"] = axis["X"] * bond1["Y"] - axis["Y"] * bond1["X"];
    
    
    # get the component of bond3 along bond1
    adjacent = bond1["X"]*bond3["X"] + bond1["Y"]*bond3["Y"] + bond1["Z"]*bond3["Z"]
    
    # get the component of bond3 along bond1
    opposite = chi90["X"]*bond3["X"] + chi90["Y"]*bond3["Y"] + chi90["Z"]*bond3["Z"]
    
    # use ArcTan to get the angle
    angle = atan2(opposite, adjacent)*180/3.1415927;
    
    return angle
}

EOF-phipsi
chmod a+x ${tempfile}phipsi.awk


goto RetrnUnwrap_awk_Scripts


########################

The Future:

recognize metals as IUM
recognize NCS by least-squares alignment? 
edit dictionary for SeMET
jam.com and traj.com
Canned omit-map procedure: omit.com




