Coverage for cvpack/sheet_rmsd_content.py: 95%
20 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-09 16:14 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-09 16:14 +0000
1"""
2.. class:: SheetRMSDContent
3 :platform: Linux, MacOS, Windows
4 :synopsis: Beta-sheet RMSD content of a sequence of residues
6.. classauthor:: Charlles Abreu <craabreu@gmail.com>
8"""
10import typing as t
12import numpy as np
13from openmm import app as mmapp
14from openmm import unit as mmunit
16from .base_rmsd_content import BaseRMSDContent
17from .units import Quantity, ScalarQuantity
19# pylint: disable=protected-access
20PARABETA_POSITIONS = BaseRMSDContent._loadPositions("ideal_parallel_beta_sheet.csv")
21ANTIBETA_POSITIONS = BaseRMSDContent._loadPositions("ideal_antiparallel_beta_sheet.csv")
22# pylint: enable=protected-access
25class SheetRMSDContent(BaseRMSDContent):
26 r"""
27 The beta-sheet RMSD content of `n` residues :cite:`Pietrucci_2009`:
29 .. math::
31 \beta_{\rm rmsd}({\bf r}) = \sum_{i=1}^{n-2-d_{\rm min}}
32 \sum_{j=i+d_{\rm min}}^{n-2} S\left(
33 \frac{r_{\rm rmsd}({\bf g}_{i,j}, {\bf g}_{\rm ref})}{r_0}
34 \right)
36 where :math:`{\bf g}_{i,j}` represents the coordinates of a group of atoms
37 selected from residues i to i+2 and residues j to j+2 of the sequence,
38 :math:`d_{\rm min}` is the minimum distance between integers i and j,
39 :math:`{\bf g}_{\rm ref}` represents the coordinates of the same atoms in an
40 ideal beta-sheet configuration, :math:`r_{\rm rmsd}({\bf g}, {\bf g}_{\rm ref})`
41 is the root-mean-square distance (RMSD) between groups :math:`{\bf g}` and
42 :math:`{\bf g}_{\rm ref}`, :math:`r_0` is a threshold RMSD value, and
43 :math:`S(x)` is a smooth step function whose default form is
45 .. math::
46 S(x) = \frac{1 + x^4}{1 + x^4 + x^8}
48 The sequence of residues must be a contiguous subset of a protein chain, ordered
49 from the N-terminus to the C-terminus. The beta-sheet RMSD content is defined for
50 both antiparallel and parallel beta sheets, with different ideal configurations
51 :math:`{\bf g}_{\rm ref}` and minimim distances :math:`d_{\rm min}` in each case.
53 Every group :math:`{\bf g}_{i,j}` is formed by the N, :math:`{\rm C}_\alpha`,
54 :math:`{\rm C}_\beta`, C, and O atoms of the six residues involvend, thus
55 comprising 30 atoms in total. In the case of glycine, the missing
56 :math:`{\rm C}_\beta` atom is replaced by the corresponding H atom. The RMSD is
57 then defined as
59 .. math::
61 r_{\rm rmsd}({\bf g}, {\bf g}_{\rm ref}) =
62 \sqrt{\frac{1}{30} \sum_{k=1}^{30} \left\|
63 \hat{\bf r}_k({\bf g}) -
64 {\bf A}({\bf g})\hat{\bf r}_k({\bf g}_{\rm ref})
65 \right\|^2}
67 where :math:`\hat{\bf r}_k({\bf g})` is the position of the :math:`k`-th atom in
68 a group :math:`{\bf g}` relative to the group's center of geometry and
69 :math:`{\bf A}({\bf g})` is the rotation matrix that minimizes the RMSD between
70 :math:`{\bf g}` and :math:`{\bf g}_{\rm ref}`.
72 In addition, one can choose to partition the sequence of residues into :math:`m`
73 blocks with :math:`n_1, n_2, \ldots, n_m` residues. In this case, only groups
74 :math:`{\bf g}_{i,j}` composed of three consecutive residues from a block and
75 three consecutive residues from the next block will be considered. The definition
76 of the RMSD content is then modified to
78 .. math::
80 \beta_{\rm rmsd}({\bf r}) = \sum_{k=1}^{m-1} \sum_{i=l_{k-1}+1}^{l_{k}-2}
81 \sum_{j=l_k+1}^{l_{k+1}-2} S\left(
82 \frac{r_{\rm rmsd}({\bf g}_{i,j}, {\bf g}_{\rm ref})}{r_0}
83 \right)
85 where :math:`l_k = \sum_{i=1}^k n_i` is the index of the last residue in block
86 :math:`k`. All blocks must be contiguous subsets of the same protein chain, ordered
87 from the N-terminus to the C-terminus. However, each block can be separated from the
88 next one by any number of residues.
90 Optionally, the beta-sheet RMSD content can be normalized to the range
91 :math:`[0, 1]`. This is done by dividing its value by the number of
92 :math:`{\bf g}_{i,j}` groups. For a single, contiguous sequence of residues, this
93 number is
95 .. math::
97 N_{\rm groups} = \frac{(n - 2 - d_{\rm min})(n - 1 - d_{\rm min})}{2}
99 In the case of a partitioned sequence, the number of groups is given by
101 .. math::
103 N_{\rm groups} = \sum_{k=1}^{m-1} (n_k - 2)(n_{k+1} - 2)
105 This collective variable was introduced in Ref. :cite:`Pietrucci_2009`. The default
106 step function shown above is identical to the one in the original paper, but written
107 in a numerically safe form. The ideal beta-sheet configurations are the same used
108 for the collective variable `ANTIBETARMSD`_ and `PARABETARMSD`_ in `PLUMED v2.8.1
109 <https://github.com/plumed/plumed2>`_ .
111 .. _ANTIBETARMSD:
112 https://www.plumed.org/doc-v2.8/user-doc/html/_a_n_t_i_b_e_t_a_r_m_s_d.html
114 .. _PARABETARMSD:
115 https://www.plumed.org/doc-v2.8/user-doc/html/_p_a_r_a_b_e_t_a_r_m_s_d.html
117 .. note::
119 The present implementation is limited to :math:`1 \leq N_{\rm groups} \leq
120 1024`.
122 Parameters
123 ----------
124 residues
125 The residue sequence or residue blocks to be used in the calculation.
126 numAtoms
127 The total number of atoms in the system (required by OpenMM).
128 parallel
129 Whether to consider a parallel beta sheet instead of an antiparallel one.
130 blockSizes
131 The number of residues in each block. If ``None``, a single contiguous
132 sequence of residues is assumed.
133 thresholdRMSD
134 The threshold RMSD value for considering a group of residues as a close
135 match to an ideal beta sheet.
136 stepFunction
137 The form of the step function :math:`S(x)`.
138 normalize
139 Whether to normalize the collective variable to the range :math:`[0, 1]`.
140 name
141 The name of the collective variable.
143 Example
144 -------
145 >>> import itertools as it
146 >>> import cvpack
147 >>> import openmm
148 >>> from openmm import app, unit
149 >>> from openmmtools import testsystems
150 >>> model = testsystems.SrcImplicit()
151 >>> residues = list(it.islice(model.topology.residues(), 68, 82))
152 >>> print(*[r.name for r in residues])
153 TYR ALA VAL ... VAL THR GLU
154 >>> sheet_content = cvpack.SheetRMSDContent(
155 ... residues, model.system.getNumParticles()
156 ... )
157 >>> sheet_content.getNumResidueBlocks()
158 28
159 >>> sheet_content.addToSystem(model.system)
160 >>> platform = openmm.Platform.getPlatformByName('Reference')
161 >>> integrator = openmm.VerletIntegrator(0)
162 >>> context = openmm.Context(model.system, integrator, platform)
163 >>> context.setPositions(model.positions)
164 >>> sheet_content.getValue(context)
165 1.0465... dimensionless
166 >>> blockwise_sheet_content = cvpack.SheetRMSDContent(
167 ... residues[:5] + residues[-5:],
168 ... model.system.getNumParticles(),
169 ... blockSizes=[5, 5],
170 ... )
171 >>> blockwise_sheet_content.getNumResidueBlocks()
172 9
173 >>> blockwise_sheet_content.addToSystem(model.system)
174 >>> context.reinitialize(preserveState=True)
175 >>> blockwise_sheet_content.getValue(context)
176 0.9859... dimensionless
177 """
179 def __init__(
180 self,
181 residues: t.Sequence[mmapp.topology.Residue],
182 numAtoms: int,
183 parallel: bool = False,
184 blockSizes: t.Optional[t.Sequence[int]] = None,
185 thresholdRMSD: ScalarQuantity = Quantity(0.08 * mmunit.nanometers),
186 stepFunction: str = "(1+x^4)/(1+x^4+x^8)",
187 normalize: bool = False,
188 name: str = "sheet_rmsd_content",
189 ) -> None:
190 if blockSizes is None:
191 min_distance = 6 if parallel else 5
192 residue_groups = [
193 [i, i + 1, i + 2, j, j + 1, j + 2]
194 for i in range(len(residues) - 2 - min_distance)
195 for j in range(i + min_distance, len(residues) - 2)
196 ]
197 elif sum(blockSizes) == len(residues):
198 bounds = np.insert(np.cumsum(blockSizes), 0, 0)
199 residue_groups = [
200 [i, i + 1, i + 2, j, j + 1, j + 2]
201 for k in range(len(blockSizes) - 1)
202 for i in range(bounds[k], bounds[k + 1] - 2)
203 for j in range(bounds[k + 1], bounds[k + 2] - 2)
204 ]
205 else:
206 raise ValueError(
207 f"The sum of block sizes ({sum(blockSizes)}) and the "
208 f"number of residues ({len(residues)}) must be equal."
209 )
211 super().__init__(
212 residue_groups,
213 PARABETA_POSITIONS if parallel else ANTIBETA_POSITIONS,
214 residues,
215 numAtoms,
216 thresholdRMSD,
217 stepFunction,
218 normalize,
219 )
220 self._registerCV(
221 name,
222 mmunit.dimensionless,
223 residues=residues,
224 numAtoms=numAtoms,
225 parallel=parallel,
226 blockSizes=blockSizes,
227 thresholdRMSD=thresholdRMSD,
228 stepFunction=stepFunction,
229 normalize=normalize,
230 )
233SheetRMSDContent.registerTag("!cvpack.SheetRMSDContent")