Source code for libgunshotmatch.search

#!/usr/bin/env python3
#
#  search.py
"""
Library search functions.
"""
#
#  Copyright © 2020-2023 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  Permission is hereby granted, free of charge, to any person obtaining a copy
#  of this software and associated documentation files (the "Software"), to deal
#  in the Software without restriction, including without limitation the rights
#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#  copies of the Software, and to permit persons to whom the Software is
#  furnished to do so, subject to the following conditions:
#
#  The above copyright notice and this permission notice shall be included in all
#  copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
#  DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
#  OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
#  OR OTHER DEALINGS IN THE SOFTWARE.
#

# stdlib
from typing import Iterable, List

# 3rd party
import pandas  # type: ignore[import-untyped]
import pyms_nist_search
from pyms.Peak.Class import Peak

# this package
from libgunshotmatch.peak import QualifiedPeak
from libgunshotmatch.utils import round_rt

__all__ = ("identify_peaks", )


[docs]def identify_peaks(
		engine: pyms_nist_search.Engine,
		peaks_to_identify: Iterable[float],
		peak_list: List[Peak],
		n_hits: int = 10,
		verbose: bool = False,
		) -> List[QualifiedPeak]:
	"""
	Identify the peaks in ``peak_list`` where their retention times are in ``peaks_to_identify``.

	:param engine:
	:param peaks_to_identify: List of retention times of peaks to identify.
	:param peak_list:
	:param n_hits: The number of hits to return for each peak.
	:param verbose: Enable debug logging
	"""

	# TODO: Shared engine between multiple calls to identify_peaks
	# (perhaps wrap this function in a class)

	# Convert float retention times to Decimal
	# rt_list = [rounders(rt, "0.0000000000") for rt in target_times]
	target_times = pandas.Series(peaks_to_identify).apply(round_rt)

	# Remove NaN values
	rt_list = [rt for rt in target_times if not rt.is_nan()]

	# Sort smallest to largest
	rt_list.sort()

	# # Obtain area for each peak
	# peak_area_list = get_area_list(self.peak_list)
	peaks = []

	# Filter to those peaks present in all samples, by UID
	for peak in peak_list:

		rounded_rt = round_rt(peak.rt / 60)

		if rounded_rt in rt_list:
			qualified_peak = QualifiedPeak.from_peak(peak)
			qualified_peak.peak_number = target_times[target_times == rounded_rt].index[0]

			ms = qualified_peak.mass_spectrum

			if verbose:
				print(f"Identifying peak at rt {rounded_rt} minutes...")

			hit_list = engine.full_spectrum_search(ms, n_hits)

			# Add search results to peak
			for hit in hit_list:
				qualified_peak.hits.append(hit)

			peaks.append(qualified_peak)

	return peaks