Source code for libgunshotmatch.utils

#!/usr/bin/env python3
#
#  utils.py
"""
Utility functions.
"""
#
#  Copyright © 2020-2023 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  Permission is hereby granted, free of charge, to any person obtaining a copy
#  of this software and associated documentation files (the "Software"), to deal
#  in the Software without restriction, including without limitation the rights
#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#  copies of the Software, and to permit persons to whom the Software is
#  furnished to do so, subject to the following conditions:
#
#  The above copyright notice and this permission notice shall be included in all
#  copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
#  DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
#  OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
#  OR OTHER DEALINGS IN THE SOFTWARE.
#

# stdlib
from decimal import Decimal
from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Sequence, Tuple, Type, TypeVar, Union

# 3rd party
import numpy
from attr import AttrsInstance
from chemistry_tools.spectrum_similarity import SpectrumSimilarity
from mathematical.utils import rounders
from pyms.DPA.Alignment import Alignment
from pyms.Peak.Class import Peak
from pyms.Spectrum import MassSpectrum
from scipy.stats import truncnorm  # type: ignore[import-untyped]

if TYPE_CHECKING:
	# this package
	from libgunshotmatch.project import Project

__all__ = ("round_rt", "get_truncated_normal", "ms_comparison", "get_rt_range", "create_alignment")


[docs]def round_rt(rt: Union[str, float, Decimal]) -> Decimal: """ Truncate precision of retention time to 10 decimal places. :param rt: """ # Limit to 10 decimal places as that's what Pandas writes JSON data as; # no need for greater precision. return rounders(rt, "0.0000000000")
[docs]def get_truncated_normal( mean: float, sd: float, low: float = 0, upp: float = 10, count: int = 10, random_state: Optional[int] = None, ) -> Sequence[float]: """ Returns ``count`` values from a truncated normal distrubition. :param mean: The midpoint of the normal distribution. :param sd: The spread of the normal distribution (the standard deviation). :param low: The lower bound. :param upp: The upper bound. :param count: :param random_state: Optional seed for the random number generator. """ # From https://stackoverflow.com/a/74448424 # By toco_tico https://stackoverflow.com/users/1060349/toto-tico # CC BY-SA 4.0 dist = truncnorm((low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd) return dist.rvs(count, random_state=random_state)
[docs]def ms_comparison(top_ms: MassSpectrum, bottom_ms: MassSpectrum) -> Optional[float]: """ Performs a Mass Spectrum similarity calculation two mass spectra. :param top_ms: :param bottom_ms: If either of ``top_ms`` or ``bottom_ms`` is :py:obj:`None` then :py:obj:`None` is returned, otherwise a comparison score is returned. """ if top_ms is None or bottom_ms is None: return None top_spec = numpy.column_stack((top_ms.mass_list, top_ms.mass_spec)) bottom_spec = numpy.column_stack((bottom_ms.mass_list, bottom_ms.mass_spec)) sim = SpectrumSimilarity( top_spec, bottom_spec, b=1, xlim=(45, 500), # TODO: configurable or taken from spectra ) match, rmatch = sim.score() return match * 1000
_AI = TypeVar("_AI", bound=AttrsInstance) def _fix_init_annotations(method: Type[_AI]) -> Type[_AI]: init_annotations = method.__init__.__annotations__ cls_annotations = method.__annotations__ for k, v in cls_annotations.items(): if k in init_annotations: if init_annotations[k] is Any: init_annotations[k] = v else: init_annotations[k] = v return method def _to_list(l: Iterable[str]) -> List[str]: # noqa: PRM002 """ Attrs type hint helper for converting to a list. Otherwise the errors are: libgunshotmatch/consolidate/__init__.py:701: error: Argument "name_filter" to "ConsolidatedPeakFilter" has incompatible type "List[str]"; expected "Iterable[_T]" [arg-type] libgunshotmatch/project.py:202: error: List item 0 has incompatible type "str"; expected "_T" [list-item] """ return list(l)
[docs]def get_rt_range(project: "Project") -> Tuple[float, float]: """ Returns the minimum and maximum retention times (in minutes) across the repeats. :param project: :rtype: .. versionadded:: 0.7.0 """ # Get RT extremes from intensity matrix min_rts, max_rts = [], [] for repeat in project.datafile_data.values(): im = repeat.datafile.intensity_matrix assert im is not None times = im.time_list min_rts.append(times[0]) max_rts.append(times[-1]) min_rt = min(min_rts) / 60 max_rt = max(max_rts) / 60 return min_rt, max_rt
[docs]def create_alignment( peakpos: Sequence[Sequence[Optional[Peak]]], expr_code: List[str], similarity: float = 0, ) -> Alignment: """ Create a new :class:`pyms.DPA.Alignment.Alignment` object. :param peakpos: Nested list of aligned peaks. Top level list contains lists of peaks for each experiment in ``expr_code``. :param expr_code: Experiment names. Order must match ``peakpos``. :param similarity: :rtype: .. versionadded:: 0.8.0 """ alignment = Alignment(None) alignment.peakpos = [list(p) for p in peakpos] # type: ignore[arg-type] alignment.peakalgt = numpy.transpose(alignment.peakpos).tolist() # type: ignore[arg-type] alignment.expr_code = expr_code alignment.similarity = similarity return alignment