Coverage for cvpack/sheet_rmsd_content.py: 95%

20 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-09 16:14 +0000

1""" 

2.. class:: SheetRMSDContent 

3 :platform: Linux, MacOS, Windows 

4 :synopsis: Beta-sheet RMSD content of a sequence of residues 

5 

6.. classauthor:: Charlles Abreu <craabreu@gmail.com> 

7 

8""" 

9 

10import typing as t 

11 

12import numpy as np 

13from openmm import app as mmapp 

14from openmm import unit as mmunit 

15 

16from .base_rmsd_content import BaseRMSDContent 

17from .units import Quantity, ScalarQuantity 

18 

19# pylint: disable=protected-access 

20PARABETA_POSITIONS = BaseRMSDContent._loadPositions("ideal_parallel_beta_sheet.csv") 

21ANTIBETA_POSITIONS = BaseRMSDContent._loadPositions("ideal_antiparallel_beta_sheet.csv") 

22# pylint: enable=protected-access 

23 

24 

25class SheetRMSDContent(BaseRMSDContent): 

26 r""" 

27 The beta-sheet RMSD content of `n` residues :cite:`Pietrucci_2009`: 

28 

29 .. math:: 

30 

31 \beta_{\rm rmsd}({\bf r}) = \sum_{i=1}^{n-2-d_{\rm min}} 

32 \sum_{j=i+d_{\rm min}}^{n-2} S\left( 

33 \frac{r_{\rm rmsd}({\bf g}_{i,j}, {\bf g}_{\rm ref})}{r_0} 

34 \right) 

35 

36 where :math:`{\bf g}_{i,j}` represents the coordinates of a group of atoms 

37 selected from residues i to i+2 and residues j to j+2 of the sequence, 

38 :math:`d_{\rm min}` is the minimum distance between integers i and j, 

39 :math:`{\bf g}_{\rm ref}` represents the coordinates of the same atoms in an 

40 ideal beta-sheet configuration, :math:`r_{\rm rmsd}({\bf g}, {\bf g}_{\rm ref})` 

41 is the root-mean-square distance (RMSD) between groups :math:`{\bf g}` and 

42 :math:`{\bf g}_{\rm ref}`, :math:`r_0` is a threshold RMSD value, and 

43 :math:`S(x)` is a smooth step function whose default form is 

44 

45 .. math:: 

46 S(x) = \frac{1 + x^4}{1 + x^4 + x^8} 

47 

48 The sequence of residues must be a contiguous subset of a protein chain, ordered 

49 from the N-terminus to the C-terminus. The beta-sheet RMSD content is defined for 

50 both antiparallel and parallel beta sheets, with different ideal configurations 

51 :math:`{\bf g}_{\rm ref}` and minimim distances :math:`d_{\rm min}` in each case. 

52 

53 Every group :math:`{\bf g}_{i,j}` is formed by the N, :math:`{\rm C}_\alpha`, 

54 :math:`{\rm C}_\beta`, C, and O atoms of the six residues involvend, thus 

55 comprising 30 atoms in total. In the case of glycine, the missing 

56 :math:`{\rm C}_\beta` atom is replaced by the corresponding H atom. The RMSD is 

57 then defined as 

58 

59 .. math:: 

60 

61 r_{\rm rmsd}({\bf g}, {\bf g}_{\rm ref}) = 

62 \sqrt{\frac{1}{30} \sum_{k=1}^{30} \left\| 

63 \hat{\bf r}_k({\bf g}) - 

64 {\bf A}({\bf g})\hat{\bf r}_k({\bf g}_{\rm ref}) 

65 \right\|^2} 

66 

67 where :math:`\hat{\bf r}_k({\bf g})` is the position of the :math:`k`-th atom in 

68 a group :math:`{\bf g}` relative to the group's center of geometry and 

69 :math:`{\bf A}({\bf g})` is the rotation matrix that minimizes the RMSD between 

70 :math:`{\bf g}` and :math:`{\bf g}_{\rm ref}`. 

71 

72 In addition, one can choose to partition the sequence of residues into :math:`m` 

73 blocks with :math:`n_1, n_2, \ldots, n_m` residues. In this case, only groups 

74 :math:`{\bf g}_{i,j}` composed of three consecutive residues from a block and 

75 three consecutive residues from the next block will be considered. The definition 

76 of the RMSD content is then modified to 

77 

78 .. math:: 

79 

80 \beta_{\rm rmsd}({\bf r}) = \sum_{k=1}^{m-1} \sum_{i=l_{k-1}+1}^{l_{k}-2} 

81 \sum_{j=l_k+1}^{l_{k+1}-2} S\left( 

82 \frac{r_{\rm rmsd}({\bf g}_{i,j}, {\bf g}_{\rm ref})}{r_0} 

83 \right) 

84 

85 where :math:`l_k = \sum_{i=1}^k n_i` is the index of the last residue in block 

86 :math:`k`. All blocks must be contiguous subsets of the same protein chain, ordered 

87 from the N-terminus to the C-terminus. However, each block can be separated from the 

88 next one by any number of residues. 

89 

90 Optionally, the beta-sheet RMSD content can be normalized to the range 

91 :math:`[0, 1]`. This is done by dividing its value by the number of 

92 :math:`{\bf g}_{i,j}` groups. For a single, contiguous sequence of residues, this 

93 number is 

94 

95 .. math:: 

96 

97 N_{\rm groups} = \frac{(n - 2 - d_{\rm min})(n - 1 - d_{\rm min})}{2} 

98 

99 In the case of a partitioned sequence, the number of groups is given by 

100 

101 .. math:: 

102 

103 N_{\rm groups} = \sum_{k=1}^{m-1} (n_k - 2)(n_{k+1} - 2) 

104 

105 This collective variable was introduced in Ref. :cite:`Pietrucci_2009`. The default 

106 step function shown above is identical to the one in the original paper, but written 

107 in a numerically safe form. The ideal beta-sheet configurations are the same used 

108 for the collective variable `ANTIBETARMSD`_ and `PARABETARMSD`_ in `PLUMED v2.8.1 

109 <https://github.com/plumed/plumed2>`_ . 

110 

111 .. _ANTIBETARMSD: 

112 https://www.plumed.org/doc-v2.8/user-doc/html/_a_n_t_i_b_e_t_a_r_m_s_d.html 

113 

114 .. _PARABETARMSD: 

115 https://www.plumed.org/doc-v2.8/user-doc/html/_p_a_r_a_b_e_t_a_r_m_s_d.html 

116 

117 .. note:: 

118 

119 The present implementation is limited to :math:`1 \leq N_{\rm groups} \leq 

120 1024`. 

121 

122 Parameters 

123 ---------- 

124 residues 

125 The residue sequence or residue blocks to be used in the calculation. 

126 numAtoms 

127 The total number of atoms in the system (required by OpenMM). 

128 parallel 

129 Whether to consider a parallel beta sheet instead of an antiparallel one. 

130 blockSizes 

131 The number of residues in each block. If ``None``, a single contiguous 

132 sequence of residues is assumed. 

133 thresholdRMSD 

134 The threshold RMSD value for considering a group of residues as a close 

135 match to an ideal beta sheet. 

136 stepFunction 

137 The form of the step function :math:`S(x)`. 

138 normalize 

139 Whether to normalize the collective variable to the range :math:`[0, 1]`. 

140 name 

141 The name of the collective variable. 

142 

143 Example 

144 ------- 

145 >>> import itertools as it 

146 >>> import cvpack 

147 >>> import openmm 

148 >>> from openmm import app, unit 

149 >>> from openmmtools import testsystems 

150 >>> model = testsystems.SrcImplicit() 

151 >>> residues = list(it.islice(model.topology.residues(), 68, 82)) 

152 >>> print(*[r.name for r in residues]) 

153 TYR ALA VAL ... VAL THR GLU 

154 >>> sheet_content = cvpack.SheetRMSDContent( 

155 ... residues, model.system.getNumParticles() 

156 ... ) 

157 >>> sheet_content.getNumResidueBlocks() 

158 28 

159 >>> sheet_content.addToSystem(model.system) 

160 >>> platform = openmm.Platform.getPlatformByName('Reference') 

161 >>> integrator = openmm.VerletIntegrator(0) 

162 >>> context = openmm.Context(model.system, integrator, platform) 

163 >>> context.setPositions(model.positions) 

164 >>> sheet_content.getValue(context) 

165 1.0465... dimensionless 

166 >>> blockwise_sheet_content = cvpack.SheetRMSDContent( 

167 ... residues[:5] + residues[-5:], 

168 ... model.system.getNumParticles(), 

169 ... blockSizes=[5, 5], 

170 ... ) 

171 >>> blockwise_sheet_content.getNumResidueBlocks() 

172 9 

173 >>> blockwise_sheet_content.addToSystem(model.system) 

174 >>> context.reinitialize(preserveState=True) 

175 >>> blockwise_sheet_content.getValue(context) 

176 0.9859... dimensionless 

177 """ 

178 

179 def __init__( 

180 self, 

181 residues: t.Sequence[mmapp.topology.Residue], 

182 numAtoms: int, 

183 parallel: bool = False, 

184 blockSizes: t.Optional[t.Sequence[int]] = None, 

185 thresholdRMSD: ScalarQuantity = Quantity(0.08 * mmunit.nanometers), 

186 stepFunction: str = "(1+x^4)/(1+x^4+x^8)", 

187 normalize: bool = False, 

188 name: str = "sheet_rmsd_content", 

189 ) -> None: 

190 if blockSizes is None: 

191 min_distance = 6 if parallel else 5 

192 residue_groups = [ 

193 [i, i + 1, i + 2, j, j + 1, j + 2] 

194 for i in range(len(residues) - 2 - min_distance) 

195 for j in range(i + min_distance, len(residues) - 2) 

196 ] 

197 elif sum(blockSizes) == len(residues): 

198 bounds = np.insert(np.cumsum(blockSizes), 0, 0) 

199 residue_groups = [ 

200 [i, i + 1, i + 2, j, j + 1, j + 2] 

201 for k in range(len(blockSizes) - 1) 

202 for i in range(bounds[k], bounds[k + 1] - 2) 

203 for j in range(bounds[k + 1], bounds[k + 2] - 2) 

204 ] 

205 else: 

206 raise ValueError( 

207 f"The sum of block sizes ({sum(blockSizes)}) and the " 

208 f"number of residues ({len(residues)}) must be equal." 

209 ) 

210 

211 super().__init__( 

212 residue_groups, 

213 PARABETA_POSITIONS if parallel else ANTIBETA_POSITIONS, 

214 residues, 

215 numAtoms, 

216 thresholdRMSD, 

217 stepFunction, 

218 normalize, 

219 ) 

220 self._registerCV( 

221 name, 

222 mmunit.dimensionless, 

223 residues=residues, 

224 numAtoms=numAtoms, 

225 parallel=parallel, 

226 blockSizes=blockSizes, 

227 thresholdRMSD=thresholdRMSD, 

228 stepFunction=stepFunction, 

229 normalize=normalize, 

230 ) 

231 

232 

233SheetRMSDContent.registerTag("!cvpack.SheetRMSDContent")