#! /usr/bin/awk -f
#
#   Calculate RMSD of atoms with the same name in two PDB files		James Holton 9-5-11
#
#	The PDB feild:
#           |<--- here -->|
#ATOM      1  N   ALA A 327      40.574  34.523  43.012  1.00 34.04
#
#	is used to determine if two atoms are a "pair"
#
BEGIN {
if(! atom) atom = "CA"
maxXYZ = maxdB = maxdO = 0
max_atom_XYZ = max_atom_dB = max_atom_dO = 0
maxXYZ_ID = maxdB_ID = maxdO_ID = "-"
max_atom_XYZ_ID = max_atom_dB_ID = max_atom_dO_ID = "-"
}

/^ATOM|^HETATM/{
    # read in values (watching for duplications)
    ID = substr($0,12,15)
    ++count[ID]

    if(count[ID] == 1)
    {
	# not initialized yet
	X[ID] = substr($0, 31, 8)+0
	Y[ID] = substr($0, 39, 8)+0
	Z[ID] = substr($0, 47, 8)+0
	O[ID] = substr($0, 55, 6)+0
	B[ID] = substr($0, 61, 6)+0
    }
    
    if(count[ID] == 2)
    {
	++pairs
    
	# seen this before, subtract values
	dX     = X[ID] - substr($0, 31, 8)
	dY     = Y[ID] - substr($0, 39, 8)
	dZ     = Z[ID] - substr($0, 47, 8)
	dO[ID] = O[ID] - substr($0, 55, 6)
	dB[ID] = B[ID] - substr($0, 61, 6)
	
	# get drift (and add up squares of drifts)
	sqrD   = dX*dX + dY*dY + dZ*dZ
	dXYZ[ID] = sqrt(sqrD)

	# remember maximum shifts
	if(dXYZ[ID] > maxXYZ) {maxXYZ  = dXYZ[ID]; maxXYZ_ID = ID }
	if(dO[ID]*dO[ID] > maxdO*maxdO) {maxdO = dO[ID]; maxdO_ID = ID }
	if(dB[ID]*dB[ID] > maxdB*maxdB) {maxdB = dB[ID]; maxdB_ID = ID }

	# maintain mean-square sums	
	sumXYZ += sqrD
	sumO   += dO[ID]*dO[ID]
	sumB   += dB[ID]*dB[ID]

	# separate stats for special atom type
	split(ID,word)
	if(word[1] == atom)
	{
	    ++atom_pairs

	    # maintain separate mean-square sums
	    sum_atom_XYZ += sqrD
	    sum_atom_O   += dO[ID]*dO[ID]
	    sum_atom_B   += dB[ID]*dB[ID]
	    
	    # remember maximum drifts too
	    if(dXYZ[ID] > max_atom_XYZ) {max_atom_XYZ  = dXYZ[ID]; max_atom_XYZ_ID = ID }
	    if(dO[ID]*dO[ID] > max_atom_dO*max_atom_dO) {max_atom_dO = dO[ID]; max_atom_dO_ID = ID }	    
	    if(dB[ID]*dB[ID] > max_atom_dB*max_atom_dB) {max_atom_dB = dB[ID]; max_atom_dB_ID = ID }	    
	}
	# debug output
	if(debug)
	{
	    printf("%s moved %8.4f (XYZ) %6.2f (occ) %6.2f (B) at %s\n", ID, dXYZ[ID],dO[ID], dB[ID], "cen_x " substr($0,31,24))
	}	
    }

    if(count[ID] > 2)
    {
	print "WARNING: " ID " appeared more than twice! "
    }
}


END{
    
    if(pairs+0 == 0) 
    {
	print "no atom pairs found"
	exit
    }
    rmsXYZ = sqrt(sumXYZ/pairs)
    rmsO = sqrt(sumO/pairs)
    rmsB = sqrt(sumB/pairs)
    if(atom_pairs+0 != 0)
    {
	rms_atom_XYZ = sqrt(sum_atom_XYZ/atom_pairs)
	rms_atom_O = sqrt(sum_atom_O/atom_pairs)
	rms_atom_B = sqrt(sum_atom_B/atom_pairs)
    }
    

    if(! xlog) 
    {
	print pairs " atom pairs found"
	print "RMSD("atom" )= " rms_atom_XYZ " ("atom_pairs, atom " pairs)"
	print "RMSD(all)= " rmsXYZ " ("pairs" atom pairs)"
	print "RMSD(Bfac)= " rmsB
	if(maxdO_ID != "-") print "RMSD(occ)= " rmsO

	print "MAXD(all)= " maxXYZ "\tfor " maxXYZ_ID
	print "MAXD(Bfac)= " maxdB "\tfor " maxdB_ID
	if(maxdO_ID != "-") print "MAXD(occ)= " maxdO "\tfor " maxdO_ID
	
	# final check for orphan atoms 
	for(id in count)
	{
	    if(count[id]<2) print "WARNING: " id " only found once"
	}
    }
    else
    {
#	printf "%10.8f %10.8f %10.5f %8.4f %10.4f %8.3f %8.3f    %s %s %s\n", rms_atom_XYZ, rmsXYZ, rmsO, rmsB, maxXYZ, maxO,maxdB, maxXYZ_ID,maxdB_ID,maxdO_ID
	printf "%10.8f %10.8f %10.5f %8.4f %10.4f %8.3f %8.3f\n", rms_atom_XYZ, rmsXYZ, rmsO, rmsB, maxXYZ, maxdO,maxdB
    }
}