Source code for libgunshotmatch.method

#!/usr/bin/env python3
#
#  __init__.py
"""
Methods for GunShotMatch analysis.
"""
#
#  Copyright © 2020-2023 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  Permission is hereby granted, free of charge, to any person obtaining a copy
#  of this software and associated documentation files (the "Software"), to deal
#  in the Software without restriction, including without limitation the rights
#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#  copies of the Software, and to permit persons to whom the Software is
#  furnished to do so, subject to the following conditions:
#
#  The above copyright notice and this permission notice shall be included in all
#  copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
#  DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
#  OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
#  OR OTHER DEALINGS IN THE SOFTWARE.
#

# stdlib
from typing import Any, Dict, List, Optional, Set, Tuple, Union

# 3rd party
import attr
import tomli_w
from dom_toml.config import Config, subtable_field
from dom_toml.config.fields import Boolean, Integer, Number, String

# this package
from libgunshotmatch.method._fields import (
		convert_crop_mass_range,
		convert_rt_range,
		convert_sg_window,
		default_base_peak_filter
		)
from libgunshotmatch.utils import _fix_init_annotations, _to_list

__all__ = (
		"Method",
		"IntensityMatrixMethod",
		"PeakDetectionMethod",
		"PeakFilterMethod",
		"AlignmentMethod",
		"ConsolidateMethod",
		"SavitzkyGolayMethod",
		)


[docs]@_fix_init_annotations @attr.define class SavitzkyGolayMethod(Config): """ Method parameters for the Savitzky-Golay filter. .. versionadded:: 0.3.0 """ #: Whether to perform Savitzky-Golay smoothing. enable: bool = Boolean.field(default=True) window: Union[str, int] = attr.field(default=7, converter=convert_sg_window) """ The window size for the Savitzky-Golay filter. Either a number of scans or a must be the form ``'<NUMBER>s'`` or ``'<NUMBER>m'``, specifying a time in seconds or minutes, respectively. """ #: The degree of the fitting polynomial for the Savitzky-Golay filter. degree: int = Integer.field(default=2)
def _convert_sg_method(method: Union[bool, "SavitzkyGolayMethod", Dict[str, Any]]) -> "SavitzkyGolayMethod": if isinstance(method, bool): return SavitzkyGolayMethod() elif isinstance(method, SavitzkyGolayMethod): return method else: return SavitzkyGolayMethod(**method)
[docs]@_fix_init_annotations @attr.define class IntensityMatrixMethod(Config): """ Method used for constructing an intensity matrix from a datafile. """ #: The range of masses to which the GC-MS data should be limited to. crop_mass_range: Optional[Tuple[int, int]] = attr.field(default=(50, 500), converter=convert_crop_mass_range) # Whether to perform Savitzky-Golay smoothing. #: Settings for Savitzky-Golay smoothing. savitzky_golay: SavitzkyGolayMethod = attr.field( default=SavitzkyGolayMethod(), converter=_convert_sg_method, ) #: Whether to perform Tophat baseline correction. tophat: bool = Boolean.field(default=True) #: The structure size for Tophat baseline correction. tophat_structure_size: str = String.field(default="1.5m")
[docs]@_fix_init_annotations @attr.define class PeakDetectionMethod(Config): """ Method used for Biller-Biemann peak detection. """ #: Number of scans over which to consider a maxima to be a peak. points: int = Integer.field(default=10) #: Number of scans to combine in a single peak from to compensate for spectra skewing. scans: int = Integer.field(default=1)
[docs]@_fix_init_annotations @attr.define class PeakFilterMethod(Config): """ Method used for peak filtering. """ #: Whether to perform automatic noise filtering of the peak list. noise_filter: bool = Boolean.field(default=True) #: The minimum number of ions that must have intensities above the noise floor, otherwise the peak is excluded. noise_threshold: int = Integer.field(default=2) # TODO: non-integer binned data #: Peaks whose base peak is at one of the listed masses (m/z) are excluded. base_peak_filter: Set[int] = attr.field( default=attr.Factory(default_base_peak_filter), converter=set, validator=attr.validators.instance_of(set), ) #: Optional retention time range (in minutes) to filter the peak list to. rt_range: Optional[Tuple[float, float]] = attr.field(default=None, converter=convert_rt_range)
[docs]@_fix_init_annotations @attr.define class AlignmentMethod(Config): """ Method used for peak alignment. """ #: Retention time tolerance parameter for pairwise alignments. rt_modulation: float = Number.field(default=2.5) #: Gap parameter for pairwise alignments. gap_penalty: float = Number.field(default=0.3) min_peaks: int = Integer.field(default=1) """ Minimum number of peaks required for the alignment position to survive filtering. If set to ``-1`` the number of repeats in the project are used. """ #: Number of peaks (starting with the largest) to include in the output. top_n_peaks: int = Integer.field(default=80) #: Minimum area of peaks to include in the output. min_peak_area: float = Number.field(default=0.0)
[docs]@_fix_init_annotations @attr.define class ConsolidateMethod(Config): """ Method used for consolidation (finding most likely identity for aligned peaks). :param min_appearances: Number of times the hit must appear across the individual aligned peaks. Consolidated peaks where the most common hit appears fewer times than this will be excluded. If set to ``-1`` the number of instances of the peak in the project are used. .. versionchanged:: 0.2.0 Added the ``min_appearances`` argument. """ name_filter: List[str] = attr.field(converter=_to_list, default=attr.Factory(list)) """ List of glob-style matches for compound names. Consolidated peaks matching any of these will be excluded. """ min_match_factor: int = Integer.field(default=600) """ Minimum average match factor. Consolidated peaks with an average match factor below this will be excluded. """ min_appearances: int = Integer.field(default=-1) """ Number of times the hit must appear across the individual aligned peaks. Consolidated peaks where the most common hit appears fewer times than this will be excluded. If set to ``-1`` the number of instances of the peak in the project are used. .. versionadded:: 0.2.0 """
# target_range = 4.0,37.0
[docs]@_fix_init_annotations @attr.define class Method(Config): """ Overall GunShotMatch method. .. latex:vspace:: 4mm """ #: Method used for constructing an intensity matrix from a datafile. intensity_matrix: IntensityMatrixMethod = subtable_field(IntensityMatrixMethod) #: Method used for Biller-Biemann peak detection. peak_detection: PeakDetectionMethod = subtable_field(PeakDetectionMethod) #: Method used for peak filtering. peak_filter: PeakFilterMethod = subtable_field(PeakFilterMethod) #: Method used for peak alignment. alignment: AlignmentMethod = subtable_field(AlignmentMethod) #: Method used for consolidation (finding most likely identity for aligned peaks). consolidate: ConsolidateMethod = subtable_field(ConsolidateMethod)
[docs] def to_toml(self) -> str: """ Convert a :class:`~.Method` to a TOML string. """ return tomli_w.dumps({"method": self.to_dict()})