#! /bin/csh -f # # Jiffy for interactively calculating cell volumes/Matthews numbers # # # alias nawk awk # starting values set MASS = 400000 set SG = P1 set SGnum = 1 set CELL = "100 100 100 90 90 90" set CHAINS = 1 set tempfile = "cell_temp" set input = "$*" goto Setup Return_from_Setup: # calculate the Vm, etc matthews_coef << end_mat >! ${tempfile}.log CELL $CELL SYMM $SGnum MOLW $MASS NMOL $CHAINS END end_mat set Vm = `awk 'BEGIN{FS=":"} /Matthews Coefficient/{print $NF+0}' ${tempfile}.log` set SOLC = `awk 'BEGIN{FS=":"} /the solvent %/{print $NF+0}' ${tempfile}.log` if("$Vm" != "") rm -f ${tempfile}.log >& /dev/null set aa = `echo $MASS | awk '{print int($1/120)}'` set test = `echo $SOLC | awk '{print ($1<0)}'` if( $test && ! $?USER_MASS && ! $?USER_CHAINS ) then # maybe re-define something? Mass? set Vm = 2.4 set UPDATE_MASS goto Setup endif # report the current values cat << EOF CELL $CELL SYMM $SG MASS $MASS ($aa aa) CHAINS $CHAINS VM $Vm SOLC ${SOLC}% CONC $Conc EOF echo "change anything? " echo -n "-> " set input = ( $< ) Setup: # scan user input for changes set mtzfile = `echo " $input " | nawk 'BEGIN{RS=" "} /.mtz$/{print; exit}'` if(-e "$mtzfile") then set CELL = `echo "head" | mtzdump hklin $mtzfile | nawk '/Cell Dimensions/{getline;getline;print}'` set SGnum = `echo "head" | mtzdump hklin $mtzfile | nawk '/Space group/{print $NF+0}'` set SG = `nawk -v num=$SGnum '$1==num && NF>5{print $4}' ${CLIBD}/symop.lib ` set Vm = 2.4 set UPDATE_MASS endif set matfile = `echo " $input " | nawk 'BEGIN{RS=" "} /.mat$/{print; exit}'` if("$matfile" == "" && "$input" == "") set matfile = index/auto.mat if(-e "$matfile") then echo "reading $matfile" set test = `awk 'NR==8 && NF==6' $matfile` if("$test" != "") then set CELL = "$test" set Vm = 2.4 set UPDATE_MASS endif set dirname = `dirname $matfile` if(-e "${dirname}/strategy.com") then echo "reading ${dirname}/strategy.com" set test = `awk '/^SYMM/{print $2}' ${dirname}/strategy.com` if("$test" != "") then set SG = `awk -v SG=$test '$4 == SG || $1== SG {print $4}' $CLIBD/symop.lib | head -1` endif endif endif set pdbfile = `echo " $input " | nawk 'BEGIN{RS=" "} /.pdb$/{print; exit}'` if(-e "$pdbfile") then set test = `awk '/^CRYST1/{print $2,$3,$4,$5,$6,$7}' $pdbfile` if("$test" != "") set CELL = "$test" set test = `awk '/^CRYST/{print substr($0,56,12)}' $pdbfile` if("$test" != "") then set SG = `awk -v pdbSG="$test" -F "[\047]" 'pdbSG==$2{print;exit}' ${CLIBD}/symop.lib | awk '{print $4}'` endif set test = `awk '/^ATOM/ || /^HETATM/{sum+=13} END{print sum}' $pdbfile` if("$test" != "") then set MASS = $test endif set test = `awk '/^SEQRES/{print $3*123.97;exit}' $pdbfile` if("$test" != "") then set MASS = $test endif set test = `~jamesh/awk/weigh $pdbfile | awk '/total mass/{print $NF}'` if("$test" != "") then set MASS = $test endif endif # detect protein mass if("$input" =~ *aa*) then set MASS = `echo " $input " | nawk 'BEGIN{RS=" "} $1=="aa" && last+0>10{print last*120;exit} $1~/[0-9]aa$/{print $1*120;exit} {last=$1}'` set USER_MASS endif if("$input" =~ *atoms*) then set MASS = `echo " $input " | nawk 'BEGIN{RS=" "} $1=="atoms" && last+0>100{print last*7;exit} {last=$1}'` set USER_MASS endif if("$input" =~ *kD*) then set MASS = `echo " $input " | nawk 'BEGIN{RS=" "} $1=="kD" && last+0>1{print last*1000;exit} $1~/[0-9]kD$/{print $1*1000;exit} {last=$1}'` set USER_MASS endif set aa = `echo $MASS | awk '{print int($1/120)}'` # convert to uppercase for the rest set input = `echo $input | nawk '{print toupper($0)}'` if("$input" =~ *MASS*) then set MASS = `echo " $input " | nawk 'BEGIN{RS=" "} $1+0>100{print $1+0;exit}'` set USER_MASS endif # detect chain count if("$input" =~ *CHAIN*) then set CHAINS = `echo " $input " | nawk 'BEGIN{RS=" "} $1+0>0&&$1<100{print $1+0;exit}'` set USER_CHAINS endif # detect new VM if("$input" =~ *VM*) then set Vm = `echo " $input " | nawk 'BEGIN{RS=" "} $1+0>0&&$1+0<6{print $1+0;exit}'` set UPDATE_MASS endif # detect new solvent content if(("$input" =~ *SOL*)||("$input" =~ *[0-9]%*)) then set SOLC = `echo " $input " | nawk 'BEGIN{RS=" "} $1+0<1{$1*=100} $1+0>1&&$1+0<100{print $1+0;exit}'` # change this to equivalent Vm set Vm = `echo $SOLC | nawk '{print 123.97/(100 - $1)}'` set UPDATE_MASS endif # detect unit cell set cell = `echo $input | nawk '{for(i=1;i<=NF-2;++i) if(($i+0>5)&&($(i+1)+0>5)&&($(i+2)+0>5)) print $i+0, $(i+1)+0, $(i+2)+0, $(i+3)+0, $(i+4)+0, $(i+5)+0}'` if("$input" =~ CELL*) then set cell = `echo "$input" | nawk '{print $2+0, $3+0, $4+0, $5+0, $6+0, $7+0}'` endif if($#cell == 6) then set CELL = `echo $cell $CELL | nawk '$1+0==0{$1=$7} $2+0==0{$2=$8} $3+0==0{$3=$9} $4+0==0{$4=$10} $5+0==0{$5=$11} $6+0==0{$6=$12} {print $1, $2, $3, $4, $5, $6}'` endif # detect space groups set sg = `echo " $input " | nawk 'BEGIN{RS=" "} /[PRHIFC][1-6]/{print; exit}'` if("$sg" != "") then set sg = `echo $sg | awk '{gsub("[Rr]","H"); print}'` # check this word against the SG library if( -e $CLIBD/symop.lib) then set sg = `nawk -v SG=$sg '$4 == toupper(SG) {print $4}' $CLIBD/symop.lib | head -1` endif if("$sg" =~ [PpCcIiFfHh][1-6]*) then set SG = "$sg" endif endif set SGnum = `awk -v SG=$SG '$4 == SG {print $1}' $CLIBD/symop.lib | head -1` # get number of assymetric units in this space group set ASU_per_CELL = `nawk -v SG=$SG '$4 == toupper(SG) {print $2}' $CLIBD/symop.lib |& head -1` set ASU_per_CELL = `nawk -v SGnum=$SGnum '$1 == SGnum {print $2}' $CLIBD/symop.lib |& head -1` if("$ASU_per_CELL" == "") set ASU_per_CELL = 1 # reconcile cell with this SG set latt = `nawk -v SG="$SG" '$4 == toupper(SG) {print $6}' $CLIBD/symop.lib |& head -1` echo "$CELL $latt" |\ nawk '{a=$1+0; b=$2+0; c=$3+0; A=$4+0; B=$5+0; G=$6+0}\ $NF == "MONOCLINIC" { A=90; ; G=90; if($4+0 > 5) B=$4+0; if($5+0 > 5) B=$5+0}\ $NF == "ORTHORHOMBIC" {A=90; B=90; G=90}\ $NF == "TETRAGONAL" || $NF == "TRIGONAL" || $NF == "HEXAGONAL" {\ b=a; A=90; B=90; G=120;\ if((c==0) && (($2-a)^2 > .0001)) {c = $2+0}}\ $NF == "TETRAGONAL" {G=90}\ $NF == "CUBIC" {b=a; c=a; A=90; B=90; G=90}\ END{if(a>5 && b>5 && c>5 && A>5 && B>5 &&G>5) print a, b, c, A, B, G}' |\ cat >! ${tempfile} set temp = `cat ${tempfile}` rm -f ${tempfile} if("$#temp" == 6) then # actual, corrected unit cell was given set CELL = `echo "$temp"` endif # calculate the volume of the cell echo $CELL |\ nawk 'NF==6{s=3.1415926535897899419/180; A=cos(s*$4); B=cos(s*$5); G=cos(s*$6); \ skew = 1 + 2*A*B*G - A*A - B*B - G*G ; if(skew < 0) skew = -skew;\ printf "%.3f\n", $1*$2*$3*sqrt(skew)}' |\ cat >! ${tempfile}volume set CELLvolume = `cat ${tempfile}volume` rm -f ${tempfile}volume >> /dev/null # calculate the concentration of asymmetric units (in mol/L) set Conc = `echo "$CELLvolume $ASU_per_CELL $CHAINS" | nawk '$2+0>0 {print $3/( ($1/$2) * (1e-10 * 10)^3 ) / 6.022e23}' | nawk '{printf "%.1f mM", $1*1000}'` # reconcile protein mass with new VM or new SOLC if($?UPDATE_MASS) then # compute an ASU size consistent with CELL and Vm provided set ASU = `echo "$CELLvolume $ASU_per_CELL $Vm" | nawk '$2+0>0 && $3+0>0{print ($1/$2) / $3}'` if("$ASU" == "") set ASU = "30000" # redefine chains if user specified a mass if($?USER_MASS) then set CHAINS = `echo $ASU $MASS | awk '{printf "%.0f", $1/$2}'` else # redefining mass, so reset chains to one if(! $?USER_CHAINS) set CHAINS = 1 set MASS = `echo $ASU $CHAINS | awk '{print $1/$2}'` endif endif unset UPDATE_MASS goto Return_from_Setup