#! /bin/csh -f # # SGsearch.com 4.1 -James Holton 6-30-04 # # Space group search script for scala scripts # set rawfile = raw.mtz set script = merge.com set logfile = logs/merge set tempfile = ${CCP4_SCR}/SGsearch_temp$$ set SG = P1 # read from $rawfile set CELL set RES = "" # default to resolution in $script or $rawfile set P1_scales_SG = "" alias nawk /usr/bin/awk nawk 'BEGIN{exit}' >& /dev/null if($status) alias nawk awk # command-line arguments (1 of 2) foreach arg ( $* ) if(-e "$arg") then if("$arg" =~ *.mtz) set rawfile = "$arg" if("$arg" =~ *.com) set script = "$arg" endif if("$arg" == ignore) set IGNORE end if((! -e "$rawfile")||(! -e "$script")) then cat << EOF usage: $0 [$script] [$rawfile] $SG 2.2A where: $script will scale and merge scaling, mergeing and truncating $rawfile $rawfile is the unmerged mtz file $SG is the "root" space group i.e. P222 for P222, P2221, P21212, and P212121 2.2A is the resolution for scaling and mergeing EOF exit 1 endif ls -1 ${logfile}.*.log >& /dev/null if((! $status)&&(! $?IGNORE)) then echo "" echo "NOTICE: using stats from ${logfile}.*.log" echo "" endif if(! -e logs) mkdir logs echo -n "evaluating alternative space groups for $rawfile with $script" ############################################################################## # update cell and SG variables from the raw MTZ header # ############################################################################## echo "HEADER" | mtzdump hklin $rawfile >! $tempfile set SG = `nawk '/Space group/{print $5}' $tempfile` set SG = `nawk '/Space group/{gsub("\047","");print $5}' $tempfile` set SGnum = ` nawk '/Space group/{print $NF+0}' ${tempfile} ` set SG = ` nawk -F "[\047]" '/Space group/{print $2}' ${tempfile} ` set SG = ` nawk -v num=$SGnum '$1==num && NF>5{print $4}' ${CLIBD}/symop.lib ` set CELL = `nawk '/Cell Dimensions/{getline;getline;print}' $tempfile` set mtzRES = `nawk '/Resolution Range/{getline;getline;print $6}' $tempfile` # calculate unit cell volume echo $CELL |\ nawk 'NF==6{s=atan2(1,0)/90; A=cos(s*$4); B=cos(s*$5); G=cos(s*$6); \ skew = 1 + 2*A*B*G - A*A - B*B - G*G ; if(skew < 0) skew = -skew;\ printf "%.3f\n", $1*$2*$3*sqrt(skew)}' |\ cat >! ${tempfile}volume set CELLvolume = `cat ${tempfile}volume` rm -f ${tempfile}volume >> /dev/null # command-line arguments (2 of 2) foreach arg ( $* ) if("$arg" =~ [PpCcIiFfRrHh][1-6]*) set SG = `echo $arg | nawk '{print toupper($0)}'` if(("$arg" =~ [0-9]*)&&("$arg" !~ [a-z,A-Z]*)) then # a pure number is on the command line if("$arg" =~ *.*) then # take it as resolution set RES = `echo $arg | nawk '$1+0>0.5 && $1+0<10{print $1+0}'` else # take it as ASU size set ASU = `echo $arg | nawk '{printf "%d", $1+0}'` endif endif if("$arg" =~ [0-9]*A) then # looks like a resolution set temp = `echo $arg | nawk '$1+0>0.5 && $1+0<10{print $1+0}'` if($#temp) set RES = "$temp" endif end if("$RES" != "") then echo " to ${RES}A" else echo "" set RES = "$mtzRES" endif # extract protein-compatible space groups from CCP4 in order of increasing symmetry #nawk '$5 ~ /^PG/ && ! /m/ && ! /bar/ && $1 < 500' $CLIBD/symop.lib >! $tempfile cat ${CLIBD}/symop.lib |\ nawk '$5 ~ /^PG/ && ! /m/ && ! /bar/ && $1 < 500{\ printf "%s ", $0; for(op=$2;op>0;--op){getline;printf $1 $2};print ""}' |\ nawk '{print length($NF), $0}' |\ sort -n |\ nawk '{print $2,$3,$4,$5,$6,$7}' >! $tempfile # get crystal system of $SG set sys = `nawk -v SG=$SG '$4==SG{print $6}' $tempfile` set latt = `echo $SG | nawk '{print substr($1,1,1)}'` # extract point group representatives nawk 'substr($4,2)==substr($5,3)' $tempfile >! ${tempfile}PGs # extract point group representatives of the same lattice type #nawk -v latt=$latt 'substr($4,1,1)==latt && substr($4,2)==substr($5,3)' $tempfile >! ${tempfile}PGs nawk -v latt=$latt 'substr($4,1,1)==latt && ! seen[substr($5,3)]{print;++seen[substr($5,3)]}' $tempfile >! ${tempfile}PGs # now get all point groups that co-index with $SG if(("$sys" == "TRIGONAL") || ("$sys" == "HEXAGONAL")) then set SGs = `nawk '$6 ~ /TRIG/{print $4} $6 ~ /HEXA/{print $4}' ${tempfile}PGs` else set SGs = `nawk -v S=$sys '$6==S{print $4}' ${tempfile}PGs` endif rm -f $tempfile ${tempfile}PGs echo "SG ASU(A^3) ASU(~kD) Rmerge(%) Brightest systematic absence" SGcheck: # add any extra considerations set SGs = ( $SGs ) if("$SGs " !~ "P1 "*) set SGs = "P1 $SGs" foreach SG ( $SGs ) # get size of ASU (in A^3) set temp = `nawk -v SG=$SG '$4==SG && $1 < 500{print $2}' $CLIBD/symop.lib` set temp = `echo "$CELLvolume $temp" | nawk '$2+0!=0{print $1/$2}'` if("$temp" != "") set volume = "$temp" # indicate P1 merge after P1_scales_SG scaling set printSG = "$SG" if("$P1_scales_SG" != "" && "$SG" == "P1" && "$P1_scales_SG" != "P1") then set SG = $P1_scales_SG set printSG = "${SG}-P1" endif # print it out in A^3 and kD at Vm=2.5 echo "$printSG" | nawk '{printf "%-7s", $0}' echo "$volume" | nawk '{printf "%9d %3d ", $0, $0/2500}' # remove bum log files if(-e ${logfile}.${printSG}.log) then grep Overall: ${logfile}.${printSG}.log >& /dev/null if(($status)||($?IGNORE)) rm -f ${logfile}.${printSG}.log endif # set up reindexing procedure set REINDEX = "" set new_SG = "$SG" # check for "pseudo-spacegroups" set temp = `echo $SG | nawk '{print toupper($1)}'` if("$temp" =~ [PC]2[12][12]*) then set axes = "" if("$temp" == P2221) then # P2221 with screw along longest axis set axes = "a b c" set new_SG = "P2221" endif if("$temp" == P2212) then # P2221 with screw along mid-length axis set axes = "b c a" set new_SG = "P2221" endif if("$temp" == P2122) then # P2221 with screw along shortest axis set axes = "c a b" set new_SG = "P2221" endif if("$temp" == P21212) then # P21212 with non-screw along longest axis set axes = "a b c" set new_SG = "P21212" endif if("$temp" == P21221) then # P21212 with non-screw along mid-length axis set axes = "b c a" set new_SG = "P21212" endif if("$temp" == P22121) then # P21212 with non-screw along shotest axis set axes = "c a b" set new_SG = "P21212" endif # cannonize these if("$new_SG" == "P222") set axes = "a b c" if("$new_SG" == "P212121") set axes = "a b c" # decide on new axis ordering (for asymmetric orthorhombics) if("$axes" != "") then # get current axis ordering # find out what the cannonical one would be # then decide how to go from current ordering to the desired one echo "$CELL" | nawk '{\ # print out current axis order \ print $1, "h"; print $2, "k"; print $3, "l"}' |\ sort -n |\ nawk '\ # add cannonical axis names\ NR==1{print $0, "a"} NR==2{print $0, "b"} NR==3{print $0, "c"}' |\ nawk -v axes="$axes" 'BEGIN{split(axes, abc)} {\ # write desired axis ordering in front of cannonical one \ print abc[NR], $0}' |\ sort |\ cat >! ${tempfile}order set REINDEX = "reindex "`nawk '{printf "%s", $3} NR~/[12]/{print ","}' ${tempfile}order` # this should give us a mapping between any two orthorhombics endif endif # only run mergeing step if we havn't already done so if((! -e ${logfile}.${printSG}.log)||($?IGNORE)) then echo -n "" >! ${logfile}.${printSG}.log # apply the resolution cutoff echo "RESOLUTION $RES" |\ mtzutils hklin1 $rawfile hklout ${tempfile}.cut.mtz >>& ${logfile}.${printSG}.log # now re-index the input, raw mtz file reindex HKLIN ${tempfile}.cut.mtz \ HKLOUT ${tempfile}.reindexed.mtz << end_reindex >>& ${logfile}.${printSG}.log SYMMETRY $new_SG $REINDEX END end_reindex rm -f ${tempfile}.cut.mtz >& /dev/null # make sure it's sorted sortmtz hklout ${tempfile}.${SG}.mtz << end_sort >>& ${logfile}.${printSG}.log H K L M/ISYM BATCH I SIGI ${tempfile}.reindexed.mtz end_sort rm -f ${tempfile}.reindexed.mtz >& /dev/null # run the merge script if("$printSG" !~ *-P1) then ./$script ${tempfile}.${SG}.mtz >>& ${logfile}.${printSG}.log else # rewrite the script to scale with one SG and merge in P1 cat $script |\ nawk '{print} tolower($1)~/^cycle/{print "DUMP '${tempfile}'_scales.txt"}' |\ cat >! ${tempfile}_scale.com cat $script |\ nawk '{print} tolower($1)~/^cycle/{print "RESTORE '${tempfile}'_scales.txt";print "ONLYMERGE"}' |\ cat >! ${tempfile}_merge.com chmod a+x ${tempfile}_scale.com ${tempfile}_merge.com ${tempfile}_scale.com ${tempfile}.${SG}.mtz >>& ${logfile}.${printSG}.log # now merge in P1 echo "SYMM P1" |\ reindex hklin ${tempfile}.${SG}.mtz \ hklout ${tempfile}.${printSG}.mtz >>& ${logfile}.${printSG}.log ${tempfile}_merge.com ${tempfile}.${printSG}.mtz >>& ${logfile}.${printSG}.log # rm -f ${tempfile}_scale.com ${tempfile}_merge.com >& /dev/null # rm -f ${tempfile}.scales.txt >& /dev/null # rm -f ${tempfile}.${printSG}.mtz >& /dev/null endif # see if truncate was run grep "SUITE: TRUNCATE" ${logfile}.${printSG}.log >& /dev/null if($status) then # truncate was not run, so lets try to run it set mergedmtz = `nawk '/Filename:/{mtz=$NF} /Scala/&&/termination/{print mtz}' ${logfile}.${printSG}.log` set ASU = `echo "$volume" | nawk '{print $0/2500}'` if((-e "$mergedmtz")&&("$ASU" != "")) then echo "nresidue $ASU" |\ truncate hklin $mergedmtz hklout ${tempfile}.mtz >>& ${logfile}.${printSG}.log if($status) then # truncate failed, try it again? endif endif endif # clean up rm -f ${tempfile}.${SG}.mtz >& /dev/null endif # print out Rmerge set Rmerge = `nawk '/ Overall: /{print $2}' logs/merge.${printSG}.log | tail -1` if("$Rmerge" == "0.000") then # use "Ranom" instead? set Rmerge = `nawk '/ Overall: /{print $5}' logs/merge.${printSG}.log | tail -1` endif if("$Rmerge" == "") then echo "-" else # print out Rmerge echo $Rmerge | nawk '{printf "%5.1f ", $1*100}' # Look for Systematic absence intensities cat logs/merge.${printSG}.log |\ nawk '/Systematic absences are OMITTED/,/Distributions/' |\ nawk 'NF==5 && ! /[a-z]/ && $5 != 0{printf "%-4d%-4d%-4d I/sig = %8.3f\n", $1, $2, $3, $4/$5}' |\ sort +5n >! $tempfile set Absence = `tail -1 $tempfile` if("$Absence" != "") then # print out the worst one tail -1 $tempfile | nawk '{printf "%s", $0}' endif rm -f $tempfile echo "" endif end # now, pick the best point/space group # gather stats from last round of runs if($?saved_SGs) then set SGs = ( $saved_SGs ) unset saved_SGs endif echo "" >! $tempfile foreach SG ( $SGs ) # indicates P1 merge after P1_scales_SG scaling set printSG = "$SG" if("$P1_scales_SG" != "" && "$SG" == "P1" && "$P1_scales_SG" != "P1") then set printSG = "${P1_scales_SG}-P1" endif set Rmerge = `nawk '/ Overall: /{print $2}' logs/merge.${printSG}.log | tail -1` if("$Rmerge" == "0.000") then # use "Ranom" instead? set Rmerge = `nawk '/ Overall: /{print $5}' logs/merge.${printSG}.log | tail -1` endif if("$Rmerge" != "") then echo -n "$SG " >> $tempfile echo -n "$Rmerge " >> $tempfile cat logs/merge.${printSG}.log |\ nawk '/Systematic absences are OMITTED/,/Distributions/' |\ nawk 'NF==5 && ! /[a-z]/ && $5 != 0{print $4/$5}' |\ sort -n | tail -1 >! $tempfile.absent set Absence = `tail -1 $tempfile.absent` echo "$Absence" >> $tempfile endif end rm -f $tempfile.absent >& /dev/null # do some statistics set best_SG = `sort -n +1 $tempfile | nawk 'NF==2' | head -1 | nawk '$2+0>0.0{print $1}'` set P1_Rmerge = `nawk '$1=="P1"{print $2}' $tempfile` set min_Rmerge = `sort -n +1 $tempfile | nawk 'NF==2' | head -1 | nawk '$2+0>0.0{print $2}'` set max_Rmerge = `sort -nr +1 $tempfile | nawk 'NF==2' | head -1 | nawk '$2+0>0.0{print $2}'` set mean_Rmerge = `nawk '$2+0>0{sum += $2;++count} END{if(count) print sum/count}' $tempfile` set rmsd_Rmerge = `nawk -v avg=$mean_Rmerge '$2+0>0{sum+=($2-avg)^2;++count} END{if(count) print sqrt(sum/count)}' $tempfile` # do we already have a "best" SG for obtaining the P1 baseline? if("$P1_scales_SG" == "" && "$best_SG" != "P1" && "$best_SG" != "") then # probably always a good idea to try and use P1_scales_SGs scales, but merge with P1 set P1_scales_SG = "$best_SG" set saved_SGs = ( $SGs ) set SGs = P1 echo "scaling with $P1_scales_SG to merge in P1" goto SGcheck endif set Rmerge_cutoff = "" # see if P1 run worked if("$P1_Rmerge" != "") then # P1 run succeeded, so "no symmetry" hypothesis has been considered if("$best_SG" == "P1") then # P1 had a lower Rmerge than anything else set Rmerge_cutoff = `echo $min_Rmerge 0.1 | nawk '{print $1+1.0*$2}'` else # P1 worked, but it was not the best score set Rmerge_cutoff = `echo $min_Rmerge 0.1 | nawk '{print $1+1.0*$2}'` endif else # P1 run failed, but we gotta have it # what do we do? set Rmerge_cutoff = 0.2 endif set test = `echo $Rmerge_cutoff 0.5 | nawk 'NF==2{print ($1 > $2)}'` if("$test" == "1") set Rmerge_cutoff = 0.5 # filter for SGs below the mergeing criteria set SGs = `nawk -v max=$Rmerge_cutoff '$2+0 < max && $3+0 < 4 {print $1}' $tempfile` if("$SGs" == "") then echo "WARNING: none of these space groups merged well." if("$P1_Rmerge" == "") echo "not even P1" echo " try to tune up $script next time." exit 9 endif # if $?PG, we have been here before if($?PG) then if($#SGs == 1) then echo "Possible space group is: $SGs" else echo "Possible space groups are: $SGs" endif # echo "$SGs" >! SGs.txt rm -f $tempfile exit endif # extract protein-compatible space groups from CCP4 in order of increasing symmetry #nawk '$5 ~ /^PG/ && ! /m/ && ! /bar/ && $1 < 500' $CLIBD/symop.lib >! $tempfile cat ${CLIBD}/symop.lib |\ nawk '$5 ~ /^PG/ && ! /m/ && ! /bar/ && $1 < 500{\ printf "%s ", $0; for(op=$2;op>0;--op){getline;printf $1 $2};print ""}' |\ nawk '{print length($NF), $0}' |\ sort -n |\ nawk '{print $2,$3,$4,$5,$6,$7}' >! $tempfile # pursue all potential possibilites (but there should really only be one) set temp = "" echo "checking for screw axes in $SGs" foreach SG ( $SGs ) # get point group of $SG set PG = `nawk -v SG=$SG '$4==SG{print $5}' $tempfile` set latt = `echo $SG | nawk '{print substr($1,1,1)}'` # now get all space groups in $PG on $latt set temp = "$temp "`nawk -v PG=$PG -v latt=$latt '$5==PG && substr($4,1,1)==latt{print $4}' $tempfile` # add pseudo-SGs for asymmetric orthorhombics if("$PG" == "PG222") then set pseudoSGs = "" foreach sg ( $temp ) set pseudoSGs = "$pseudoSGs $sg" if("$sg" == "P2221") set set pseudoSGs = "$pseudoSGs P2212 P2122" if("$sg" == "P21212") set set pseudoSGs = "$pseudoSGs P21221 P22121" end set temp = "$pseudoSGs" endif end rm -f $tempfile set SGs = `echo $temp` # check each of these goto SGcheck exit