Source code for nmrstarlib.plsimulator

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
nmrstarlib.plsimulator
~~~~~~~~~~~~~~~~~~~~~~

This module provides interface classes necessary to create simulated peak list file.
"""

import json
import re
from itertools import product

from . import nmrstarlib


[docs]class DimensionComponent(object): """Dimensions component interface."""
[docs] def __init__(self, label, position): """Dimension component. :param str label: Label of a dimension. :param int position: Position of dimensions within a peak according to sequence site position (-1, 0, or +1). """ self.label = label self.position = position
[docs]class DimensionGroup(DimensionComponent): """Composite dimension group."""
[docs] def __init__(self, label, position): """Dimension group. :param str label: Label of a dimension. :param int position: Position of dimensions within a peak according to sequence site position, (-1, 0, or +1). """ super(DimensionGroup, self).__init__(label, position) self.dimensions = []
[docs]class Dimension(DimensionComponent): """Concrete dimension."""
[docs] def __init__(self, label, position, assignment=None, chemshift=None): """Concrete dimension intializer. :param str label: Label of a dimension. :param int position: Position of dimensions within a peak according to sequence site position, (-1, 0, or +1). :param str assignment: Chemical shift assignment of a dimension. :param float chemshift: Chemical shift value of a dimension. """ super(Dimension, self).__init__(label, position) self.assignment = assignment self.chemshift = chemshift
[docs]class Peak(list): """Peak within a peak list."""
[docs] def __init__(self, labels): """Peak initializer. :param tuple labels: Dimension labels of peak. """ super(Peak, self).__init__() self.labels = labels
@property def assignments_list(self): """List of assignments per each dimension within a peak. :return: List of assignments. :rtype: :py:class:`list` """ return [dim.assignment for dim in self] @property def chemshifts_list(self): """List of chemical shift values per each dimensions within a peak. :return: List of chemical shifts. :rtype: :py:class:`list` """ return [dim.chemshift for dim in self]
[docs] def apply_noise(self, noise_generator, split_idx, ndigits=6): """Apply noise to dimensions within a peak. :param noise_generator: Noise generator object. :param int split_idx: Index specifying which peak list split parameters to use. :return: None :rtype: :py:obj:`None` """ noise = noise_generator.generate(self.labels, split_idx) for dim, noise_value in zip(self, noise): dim.chemshift = round(dim.chemshift + noise_value, ndigits)
[docs]class PeakList(list): """Peak list contains chemical shift values and assignment information for each peak."""
[docs] def __init__(self, spectrum_name, labels, source, chain_idx): """Peak list initializer. :param str spectrum_name: Spectrum name from which peak list will be simulated. :param list labels: Sequence of labels as they appear in a peak. :param str source: :class:`~nmrstarlib.nmrstarlib.NMRStarFile` source. :param int chain_idx: :class:`~nmrstarlib.nmrstarlib.NMRStarFile` chain index. """ super(PeakList, self).__init__() self.spectrum_name = spectrum_name self.labels = labels self.source = "{}_{}_{}".format(source, spectrum_name, chain_idx)
[docs] def _to_sparky(self): """Save :class:`~nmrstarlib.plsimulator.PeakList` into Sparky-formatted string. :return: Peak list representation in Sparky format. :rtype: :py:class:`str` """ sparky_str = "Assignment\t\t{}\n\n".format("\t\t".join(["w" + str(i + 1) for i in range(len(self.labels))])) for peak in self: assignment_str = "-".join(peak.assignments_list) dimensions_str = "\t\t".join([str(chemshift) for chemshift in peak.chemshifts_list]) sparky_str += ("{}\t\t{}\n".format(assignment_str, dimensions_str)) return sparky_str
[docs] def _to_autoassign(self): """Save :class:`~nmrstarlib.plsimulator.PeakList` into AutoAssign-formatted string. :return: Peak list representation in AutoAssign format. :rtype: :py:class:`str` """ autoassign_str = "#Index\t\t{}\t\tIntensity\t\tWorkbook\n".format( "\t\t".join([str(i + 1) + "Dim" for i in range(len(self.labels))])) for peak_idx, peak in enumerate(self): dimensions_str = "\t\t".join([str(chemshift) for chemshift in peak.chemshifts_list]) autoassign_str += "{}\t\t{}\t\t{}\t\t{}\n".format(peak_idx+1, dimensions_str, 0, self.spectrum_name) return autoassign_str
[docs] def _to_json(self): """Save :class:`~nmrstarlib.plsimulator.PeakList` into JSON string. :return: Peak list representation in JSON format. :rtype: :py:class:`str` """ json_list = [{"Assignment": peak.assignments_list, "Dimensions": peak.chemshifts_list} for peak in self] return json.dumps(json_list, sort_keys=True, indent=4)
[docs] def write(self, filehandle, fileformat): """Write :class:`~nmrstarlib.plsimulator.PeakList` data into file. :param filehandle: file-like object. :type filehandle: :py:class:`io.TextIOWrapper` :param str fileformat: Format to use to write data: `sparky`, `autoassign`, or `json`. :return: None :rtype: :py:obj:`None` """ try: if fileformat == "sparky": sparky_str = self._to_sparky() filehandle.write(sparky_str) elif fileformat == "autoassign": autoassign_str = self._to_sparky() filehandle.write(autoassign_str) elif fileformat == "json": json_str = self._to_json() filehandle.write(json_str) else: raise TypeError("Unknown file format.") except IOError: raise IOError('"filehandle" parameter must be writable.') filehandle.close()
[docs] def writestr(self, fileformat): """Write :class:`~nmrstarlib.plsimulator.PeakList` data into string. :param str fileformat: Format to use to write data: `sparky`, `autoassign`, or `json`. :return: String representing the :class:`~nmrstarlib.plsimulator.PeakList` instance. :rtype: :py:class:`str` """ try: if fileformat == "sparky": sparky_str = self._to_sparky() return sparky_str elif fileformat == "autoassign": autoassign_str = self._to_autoassign() return autoassign_str elif fileformat == "json": json_str = self._to_json() return json_str else: raise TypeError("Unknown file format.") except IOError: raise IOError('"filehandle" parameter must be writable.')
[docs]class SpinSystem(list): """Spin system - collection of related resonances associated with specific atoms in a molecule."""
[docs] def __init__(self): """Spin system initializer.""" super(SpinSystem, self).__init__()
[docs]class SequenceSite(list): """Sequence site."""
[docs] def __init__(self, residues): """Sequence site initializer.""" super(SequenceSite, self).__init__(residues)
[docs] def is_sequential(self): """Check if residues that sequence site is composed of are in sequential order. :return: If sequence site is in valid sequential order (True) or not (False). :rtype: :py:obj:`True` or :py:obj:`False` """ seq_ids = tuple(int(residue["Seq_ID"]) for residue in self) return seq_ids == tuple(range(int(seq_ids[0]), int(seq_ids[-1])+1))
[docs]class PeakTemplate(list): """Peak templates defined as a list of concrete dimensions."""
[docs] def __init__(self, dimensions): """Peak template initializer.""" super(PeakTemplate, self).__init__(dimensions)
@property def dimension_labels(self): """List of dimension labels. :return: List of dimension labels of a peak template. :rtype: :py:class:`list` """ return [dim.label for dim in self] @property def dimension_positions(self): """List of dimension positions. :return: List of dimension positions of a peak template. :rtype: :py:class:`list` """ return [dim.position for dim in self]
[docs]class PeakDescription(list): """Peak descriptions defined as list of general dimension groups.""" dim_pattern = re.compile("(\w+)([+-]?\d)?")
[docs] def __init__(self, fraction, dimension_labels): """Peak description initializer. :param float fraction: Describes expected number of peaks. :param dimension_labels: List of dimension labels. """ self.fraction = fraction self.relative_positions = [] dimensions = [] for label in dimension_labels: dim_label, position = re.match(self.dim_pattern, label).groups() if position: self.relative_positions.append(int(position)) else: self.relative_positions.append(0) dimensions.append(dim_label) if all(position <= 0 for position in self.relative_positions): seq_site_positions = [position + abs(min(self.relative_positions)) for position in self.relative_positions] elif all(position >= 0 for position in self.relative_positions): seq_site_positions = self.relative_positions else: # TODO: take min and max and determine window size raise NotImplementedError dimension_groups = self.create_dimension_groups(zip(dimensions, seq_site_positions)) super(PeakDescription, self).__init__(dimension_groups)
[docs] @staticmethod def create_dimension_groups(dimension_positions): """Create list of dimension groups. :param zip dimension_positions: List of tuples describing dimension and its position within sequence site. :return: List of dimension groups. :rtype: :py:class:`list` """ dimension_groups = [] for dim_group_label, position in dimension_positions: dim_group = DimensionGroup(dim_group_label, position) for dim_label in nmrstarlib.RESONANCE_CLASSES[dim_group_label]: dim_group.dimensions.append(Dimension(dim_label, position)) dimension_groups.append(dim_group) return dimension_groups
[docs]class Spectrum(list): """Spectrum object described as a list of general peak descriptions."""
[docs] def __init__(self, name, labels, min_spin_system_peaks, amino_acids_and_atoms=None): """Spectrum initializer. :param str name: Spectrum name. :param labels: Sequence of dimension labels as they appear in a peak. :param int min_spin_system_peaks: Minimum number of peaks per spin system. """ super(Spectrum, self).__init__() self.name = name self.labels = labels self.min_spin_system_peaks = min_spin_system_peaks self.amino_acids_and_atoms = amino_acids_and_atoms
@property def peak_templates(self): """Create a list of concrete peak templates from a list of general peak descriptions. :return: List of peak templates. :rtype: :py:class:`list` """ peak_templates = [] for peak_descr in self: expanded_dims = [dim_group.dimensions for dim_group in peak_descr] templates = product(*expanded_dims) for template in templates: peak_templates.append(PeakTemplate(template)) return peak_templates @property def seq_site_length(self): """Calculate length of a single sequence site based upon relative positions specified in peak descriptions. :return: Length of sequence site. :rtype: :py:class:`int` """ relative_positions_set = set() for peak_descr in self: relative_positions_set.update(peak_descr.relative_positions) return len(relative_positions_set)