#! /bin/csh -f # # Recursive RSPS # # For completely brute-force heavy atom searching :) # # courtesy of the Phaser Elves # # set this to wherever your awk program is set nawk = nawk $nawk 'BEGIN{print}' >& /dev/null if($status) set nawk = awk alias nawk $nawk set SIGMA_CUTOFF = 3 set CLOSE_peaks = 0.5 set MAX_SITES = 30 set MAXPEAKS = 300 set outfile = "./rrsps.sites" set mapfiles = "" set SGs = "" set mapfile = "" set SG = "" set fixxyzfile = "" set tempfile = ./rrsps_tempfile set bestscore = 0 set SYMOP goto Setup ############################################################# # # scan command line for map filename, space group(s) # and "fixed" site list file # Help: cat << EOF-Help usage: $0 patt.map P212121 3 sigma 30 sites where: patt.map - is the Patterson map you want to solve (CCP4 format) P212121 - is the space group you want to try 3 sigma - only consider harker peaks > 3*sigma 30 sites - maximum number of sites you expect to find Note: you CAN specify more than one space group, and/or more than one map file. Each will be considered in turn. EOF-Help goto Cleanup_RSPS ############################################################# ReturnFrom_Setup: # this is the tricky one, this file MUST NOT be # overwritten by the child processes! if(! $?RRSPS_DEPTH) then # this must be the first instance setenv RRSPS_DEPTH 0 # do multi-spacegroup run foreach mapfile ( $mapfiles ) echo "using $mapfile" foreach SG ( $SGs ) # recurse #echo "trying $SG" # initialize the cumulative files echo -n "" >! ${tempfile}_${SG}_biglist echo -n "" >! ${outfile}_${SG} # recurse $0 $SG $mapfile $PASSALONG end end # we are done goto Cleanup_RSPS endif # if we reach this point # there is only one SG and one map set mapfile = "$mapfiles" set SG = "$SGs" # this variable keeps rrsps child runs from overwriting # their parent job's files @ temp = ( $RRSPS_DEPTH + 1 ) setenv RRSPS_DEPTH $temp set unique = "${tempfile}_$RRSPS_DEPTH" # skip over harker procedure on all but first instance if(-e "$fixxyzfile") goto cross_scan harker_scan: ############################################################# # kick-start heavy atom search with a harker scan echo -n "$SG harker scan... " rsps $SYMOP << EOF-harker >! ${tempfile}.log spacegroup $SG patfile $mapfile scorfile ${tempfile}rsps.map mode harker reject 0 scan au pick scoremap $MAXPEAKS EOF-harker grep ERROR ${tempfile}.log if(! $status) then echo "RSPS failed! see rsps.error.log to find out why." mv ${tempfile}.log rsps.error.log goto Cleanup_RSPS endif # convert any peaks > sigma cutoff to new, potential sites cat ${tempfile}.log |\ nawk 'BEGIN{rms=999999} \ /Rms deviation from mean/ && $NF+0 != 0{rms=$NF+0}\ /ANGSTROM COORDINATES/{table=1} \ NF>7 && ! /[A-Za-z]/{print $2, $3, $4, $5, $6, $7, $8/rms}' |\ nawk -v cutoff=$SIGMA_CUTOFF '$NF > cutoff' |\ sort -nr +6 >! ${unique}_newsites #cat >! ${unique}_newsites rm -f ${tempfile}.log >& /dev/null rm -f ${tempfile}rsps.map >& /dev/null set hits = `cat ${unique}_newsites | wc -l` set temp = `tail -1 ${unique}_newsites | nawk '{printf "%.2f", $NF}'` echo "$hits hits > $temp sigma" if(($hits == 0)||("$SG" == "P1")) then # woops! no harker vectors! echo "0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 $SIGMA_CUTOFF" >! ${unique}_newsites endif # now act like we just found this in a "regular" (cross) scan set fixxyzfile = ${tempfile}.empty touch $fixxyzfile goto Recurse cross_scan: ############################################################# # look for new sites, given that we already have # the sites listed in $fixxyzfile # tell RSPS to scan ASU for sites with cross-vectors to $fixxyzfile cat << EOF-cross >! ${tempfile}rsps.in spacegroup $SG patfile $mapfile scorfile ${tempfile}rsps.map mode cross #reject 0 EOF-cross cat $fixxyzfile |\ nawk '{print "fixxyz", $1, $2, $3}' >> ${tempfile}rsps.in # also get the "harker score", and avoid crossvector search peaks # that don't have Harker peaks cat << EOF-cross2 >> ${tempfile}rsps.in scan au pick scoremap $MAXPEAKS mode harker vlist site 1 $MAXPEAKS EOF-cross2 # run rsps and convert any peaks > sigma cutoff to new, potential sites cat ${tempfile}rsps.in | rsps |\ nawk 'BEGIN{rms=999999} \ /Rms deviation from mean/ && $NF+0 != 0{rms=$NF+0}\ /ANGSTROM COORDINATES/{table=1} \ $1=="SCORE"{++n; print n, $3} \ NF>7 && ! /[A-Za-z]/{print $2, $3, $4, $5, $6, $7}' |\ nawk -v cutoff=$SIGMA_CUTOFF '\ NF!=2{++n;line[n]=$0}\ NF==2 && $2+0>cutoff{print line[$1], $2}' |\ sort -nr +6 >! ${unique}_newsites #cat >! ${unique}_newsites rm -f ${tempfile}rsps.in >& /dev/null rm -f ${tempfile}rsps.map >& /dev/null # filter out the fixxyz sites, and their symm mates # generate all symmetry mates of fixed sites cat $fixxyzfile |\ nawk -v SG=$SG -v CELL="$CELL" '\ BEGIN{print "SYMM", SG; print "CELL", CELL;\ print "XYZLIM -0.1 1.1 -0.1 1.1 -0.1 1.1"} \ {print "ATOM X", $1, $2, $3}' |\ gensym |\ nawk '/List of sites/,/Normal termination/' |\ nawk '$2 ~ /[01].[0-9][0-9][0-9]/ {print $5, $6, $7, "fixed"}' |\ cat >! ${tempfile}fixxyz cat ${tempfile}fixxyz ${unique}_newsites |\ nawk -v cut=$CLOSE_peaks ' \ $NF=="fixed"{++n; X[n]=$1; Y[n]=$2; Z[n]=$3} \ $NF!="fixed"{minD=999999; \ # find nearest "old" site \ for(i=1;i<=n;++i){\ dist=sqrt(($4-X[i])^2 +($5-Y[i])^2 +($6-Z[i])^2);\ if(dist < minD){\ minD=dist;}}\ # now see if it is too close \ if(minD > cut) {print}}' |\ cat >! ${tempfile} mv ${tempfile} ${unique}_newsites # now represent each "new site" as it's true, full constellation # of cross-vector related peaks # label the "fixxyz" sites, and their scores cat $fixxyzfile |\ nawk '{print $1, $2, $3, $NF, "fixed"}' >! ${tempfile}fixxyz # explode the list nawk 'NF>3' ${tempfile}fixxyz ${unique}_newsites |\ nawk -v SG=$SG -v file=$mapfile 'BEGIN{ bigscore=1;} \ $NF=="fixed"{# collect list of fixed sites \ ++n; X[n]=$1; Y[n]=$2; Z[n]=$3; score[n]=$(NF-1); bigscore*=$(NF-1)} \ $NF!="fixed"{ printf "%11.1f ", bigscore*$NF \ # print out all fixed sites first \ for(i=1;i<=n;++i){printf "%s %s %s ", X[i], Y[i], Z[i];}\ # each new site goes in the middle \ printf "%s %s %s | %s %s ", $1, $2, $3, SG, file; \ # sigma scores follow in the same order \ for(i=1;i<=n;++i){printf "%s ", score[i]}; \ print $NF }' |\ cat >> ${tempfile}_${SG}_biglist # update "best" group of sites yet set bestscore = `tail -1 ${outfile}_$SG |& nawk '{print $NF+0}'` sort -n ${tempfile}_${SG}_biglist | tail -1 |\ nawk 'BEGIN{FS="|"} {print $1; print $2}' |\ nawk 'NR==1{n=0; bigscore=$1; for(i=2;i<=NF;i+=3){++n; \ X[n]=$i; Y[n]=$(i+1); Z[n]=$(i+2);}} \ NR==2{print $1, "in", $2; for(i=1;i<=n;++i){\ printf "%4d %8.5f %8.5f %8.5f %8.1f*sigma\n", i, X[i], Y[i], Z[i], $(i+2);}\ printf "product: %31.1f\n", bigscore}' |\ cat >! ${outfile}_$SG set temp = `tail -1 ${outfile}_$SG | nawk '{print $NF+0}'` if("$bestscore" != "$temp") then echo "" cat ${outfile}_$SG set bestscore = "$temp" endif # don't need this anymore rm -f ${tempfile}fixxyz >& /dev/null Recurse: ############################################################# # run down the list of all peaks we found, and launch # a new instance of $0 to look for more cross peaks set site = 0 set sites = `cat ${unique}_newsites | wc -l` while (( $site < $sites )&&($RRSPS_DEPTH < $MAX_SITES)) @ site = ( $site + 1 ) # use all the "fixxyz" sites we just used cat $fixxyzfile >! ${unique}_fixxyz # and add the one site we are interested in cat ${unique}_newsites |\ nawk -v site=$site 'NR==site' |\ cat >> ${unique}_fixxyz # here we go! if($RRSPS_DEPTH == 1) then # progress meter echo "$site $sites" | nawk '$2+0>0 && $1+0>1{printf "%d%%", 100*$1/$2}' endif echo -n "." $0 $SG $mapfile $PASSALONG ${unique}_fixxyz # each child will report it's own findings in ${tempfile}_${SG}_biglist end # done with this file rm -f ${unique}_fixxyz ${unique}_newsites >& /dev/null goto Cleanup_RSPS exit Setup: set i = 0 while( $i < $#argv ) @ i = ( $i + 1 ) @ nexti = ( $i + 1 ) @ lasti = ( $i - 1 ) if($nexti > $#argv) set nexti = $#argv if($lasti < 1) set lasti = 1 set arg = "$argv[$i]" if(! -e "$arg") then # space group if("$arg" =~ [PpCcIiFfRrHh][1-6]*) then set temp = `echo $arg | nawk '{print toupper($1)}'` if($?CLIBD) then set temp = `nawk -v SG=$temp '$4 == SG {print $4}' $CLIBD/symop.lib | head -1` endif if("$temp" != "") then # add this SG to the space group list set SGs = "$SGs $temp" continue else # check for "pseudo-spacegroup" language set temp = `echo $arg | nawk '{print toupper($1)}'` set temp = `echo $temp | nawk '/P2212|P2122|P21221|P22121/'` if("$temp" != "") then # PG222 with screw along non-standard axis set SGs = "$SGs $temp" # create new spacegroup library for RSPS if(! -e "${tempfile}rsps_spacegroups") mkdir ${tempfile}rsps_spacegroups if(! -e "${tempfile}rsps_spacegroups/data") mkdir ${tempfile}rsps_spacegroups cat $CLIBD/symop.lib >! ${tempfile}rsps_spacegroups/symop.lib set SYMOP = "SYMOP ${tempfile}rsps_spacegroups/symop.lib" endif endif endif if("$arg" =~ [0-9]*) then # we have a number set temp = `echo "$arg $argv[$nexti]" | nawk 'tolower($0)~/sigma$/ || tolower($1)~/sigma$/{if($1+0>0) print $1+0}'` if("$temp" != "") then set SIGMA_CUTOFF = "$temp" @ i = ( $i + 1 ) continue endif # maximum number of sites to find (recursion depth) set temp = `echo "$arg $argv[$nexti]" | nawk 'tolower($0)~/sites$/ || tolower($1)~/sites$/{if(($1+0>1)&&(int($1+0)==$1+0)) print $1+1}'` if("$temp" != "") then set MAX_SITES = "$temp" @ i = ( $i + 1 ) continue endif endif endif # files if(-e "$arg") then if("$arg" =~ *.map) then # check to see if it's a ccp4 map set temp = `echo "" | mapdump MAPIN $arg |& nawk '/Cell dimensions/{print $4, $5, $6, $7, $8, $9}'` if("$temp" != "") then set mapfiles = "$mapfiles $arg" set CELL = "$temp" else echo "WARNING: $arg is not a CCP4 map! " endif else # see if this is a "site" file egrep "[01].[0-9][0-9][0-9]" $arg >& /dev/null if(! $status) then set temp = `nawk '$1~/^[0-1].[0-9][0-9][0-9]/ || $1~/^-[0-1].[0-9][0-9][0-9]/ ' $arg | wc -l` if($temp > 0) then # use as the "sites" file set fixxyzfile = "$arg" endif endif endif endif end set SGs = `echo "$SGs"` set mapfiles = `echo "$mapfiles"` if(($#mapfiles == 0) || ($#SGs == 0)) goto Help set PASSALONG = "$MAX_SITES sites $SIGMA_CUTOFF sigma" # extend spacegroup library if("$CLIBD" == "${tempfile}rsps_spacegroups") then # add pseudo-spacegroups cat << EOF-new_sg_lib >> ${CLIBD}/symop.lib 1017 4 4 P2122 PG222 ORTHORHOMBIC 'P 21 2 2' !(unique axis a) X,Y,Z -X,Y,-Z 1/2+X,-Y,-Z 1/2-X,-Y,Z 2017 4 4 P2212 PG222 ORTHORHOMBIC 'P 2 21 2' !(unique axis b) X,Y,Z X,1/2-Y,-Z -X,1/2+Y,-Z -X,-Y,Z 1018 4 4 P21212a PG222 ORTHORHOMBIC 'P 21 21 2 (a)' ! origin on 21 21, shift (1/4,1/4,0) X,Y,Z 1/2-X,1/2-Y,Z X+1/2,-Y,-Z -X,Y+1/2,-Z 2018 4 4 P21221 PG222 ORTHORHOMBIC 'P 21 2 21' !(unique axis b) X,Y,Z -X,Y,-Z 1/2+X,-Y,1/2-Z 1/2-X,-Y,1/2+Z 3018 4 4 P22121 PG222 ORTHORHOMBIC 'P 2 21 21' !(unique axis a) X,Y,Z X,-Y,-Z -X,1/2+Y,1/2-Z -X,1/2-Y,1/2+Z 1020 2 1 C2221a PG222 ORTHORHOMBIC 'C 2 2 21a)' ! P212121 with C centring, shift(1/4,0,0) X,Y,Z 1/2-X,-Y,1/2+Z 1/2+X,1/2-Y,-Z -X,1/2+Y,1/2-Z 1/2+X,1/2+Y,Z -X,1/2-Y,1/2+Z X,-Y,-Z 1/2-X,Y,1/2-Z 1021 2 1 C222a PG222 ORTHORHOMBIC 'C 2 2 2a' ! C21212a origin on 21 21 X,Y,Z 1/2-X,1/2-Y,Z X+1/2,-Y,-Z -X,Y+1/2,-Z 1/2+ X,1/2+Y,Z -X,-Y,Z X,1/2-Y,-Z 1/2-X,Y,-Z 1022 4 1 F222a PG222 ORTHORHOMBIC 'F 2 2 2a' ! same as 1018 with face centring ! shift (1/4,0,0) X,Y,Z 1/2-X,1/2-Y,Z X+1/2,-Y,-Z -X,Y+1/2,-Z X,Y+1/2,Z+1/2 1/2-X,-Y,Z+1/2 X+1/2,-Y+1/2,-Z+1/2 -X,Y,-Z+1/2 X+1/2,Y,Z+1/2 -X,1/2-Y,Z+1/2 X,-Y,-Z+1/2 -X+1/2,Y+1/2,-Z+1/2 X+1/2,Y+1/2,Z -X,-Y,Z X,-Y+1/2,-Z -X+1/2,Y,-Z EOF-new_sg_lib endif goto ReturnFrom_Setup Cleanup_RSPS: if(! $?RRSPS_DEPTH) set RRSPS_DEPTH = -1 if($RRSPS_DEPTH == 0) then # the "real" finish echo "" echo "done! " foreach SG ( $SGs ) echo "" echo -n "best solution for " cat ${outfile}_$SG sort -nr ${tempfile}_${SG}_biglist |\ nawk 'BEGIN{FS="|"} NF==2{print $1; print $2}' |\ nawk 'NR%2==1{n=0; bigscore=$1; for(i=2;i<=NF;i+=3){++n; \ X[n]=$i; Y[n]=$(i+1); Z[n]=$(i+2);}} \ NR%2==0{print ""; print $1, "in", $2; for(i=1;i<=n;++i){\ printf "%4d %8.5f %8.5f %8.5f %8.1f*sigma\n", i, X[i], Y[i], Z[i], $(i+2);}\ printf "product: %31.1f\n", bigscore}' |\ cat >! ${outfile}_$SG echo "full listing for $SG is in ${outfile}_$SG" rm -f ${tempfile}_${SG}_biglist >& /dev/null end rm -f ${tempfile}.empty >& /dev/null endif rm -rf ${tempfile}rsps_spacegroups/ >& /dev/null exit ######################## # Future plans - fix the non-standard orthorhombic implementation