Source code for lipyd.ms2

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `lipyd` python module
#
#  Copyright (c) 2015-2018 - EMBL
#
#  File author(s): Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GNU GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      http://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: http://www.ebi.ac.uk/~denes
#

from __future__ import print_function
from future.utils import iteritems
from past.builtins import xrange, range, reduce

import sys
import imp
import re
import math
import copy
import itertools
import collections
from argparse import Namespace
import numpy as np

from lipyd.common import *
import lipyd.mgf as mgf
import lipyd.mz as mzmod
import lipyd.session as session
import lipyd.settings as settings
import lipyd.lookup as lookup
import lipyd.fragdb as fragdb
import lipyd.moldb as moldb
import lipyd.lipproc as lipproc


ChainFragment = collections.namedtuple(
    'ChainFragment',
    ['c', 'u', 'fragtype', 'chaintype', 'i', 'intensity']
)


[docs]class MS2Identity(collections.namedtuple( 'MS2IdentityBase', [ 'score', 'max_score', 'score_pct', 'hg', 'chainsum', 'chains', 'chain_details', 'scan_details', ] )): def __new__( cls, score = 0, max_score = 0, score_pct = 0, hg = None, chainsum = None, chains = None, chain_details = None, scan_details = None, ): return super(MS2Identity, cls).__new__( cls, score, max_score, score_pct, hg, chainsum = chainsum, chains = chains, chain_details = chain_details, scan_details = scan_details, ) def __str__(self): return ( lipproc.full_str(self.hg, self.chains) if self.chains else lipproc.summary_str(self.hg, self.chainsum) )
[docs] def full_str(self): details = [] details.append('score=%.01f' % self.score_pct) if self.scan_details: if self.scan_details.deltart is not None: details.append('deltart=%.02f' % self.scan_details.deltart) if self.scan_details.sample_id is not None: sample_id = self.scan_details.sample_id if isinstance(sample_id, tuple): sample_id = ''.join(str(i) for i in sample_id) details.append('sample=%s' % sample_id) if self.scan_details.scan_id is not None: details.append('scan=%u' % self.scan_details.scan_id) return '%s[%s]' % ( self.__str__(), ','.join(details), )
[docs] def summary(self): return self.__str__(), self.score_pct
def __eq__(self, other): return ( isinstance(other, MS2Identity) and self.hg == other.hg and self.chainsum == other.chainsum and self.chains == other.chains )
ChainIdentificationDetails = collections.namedtuple( 'ChainIdentificationDetails', ['rank', 'i', 'fragtype'] ) ChainIdentificationDetails.__new__.__defaults__ = (None, None, None) ScanDetails = collections.namedtuple( 'ScanDetails', ['sample_id', 'scan_id', 'source', 'deltart'] ) ChainIdentificationDetails.__new__.__defaults__ = (None, None, None, None)
[docs]class mz_sorted(object): def __init__(self, scan): self.scan = scan def __enter__(self): self.scan.sort_mz() def __exit__(self, extyp, exval, tb): self.scan.sort_intensity()
[docs]class intensity_sorted(object): def __init__(self, scan): self.scan = scan def __enter__(self): self.scan.sort_intensity() def __exit__(self, extyp, exval, tb): self.scan.sort_mz()
[docs]class ScanBase(object): def __init__( self, mzs, ionmode, precursor = None, intensities = None, tolerance = None, scan_id = None, ): self.tolerance = tolerance or settings.get('ms2_tolerance') self.sorted_by = None self.mzs = mzs self.ionmode = ionmode self.adducts = {} self.intensities = ( np.array([1.0] * len(self.mzs)) if intensities is None else intensities ) self.precursor = precursor self.scan_id = scan_id if self.mzs is not np.ndarray: self.mzs = np.array(self.mzs) if self.intensities is not np.ndarray: self.intensities = np.array(self.intensities) self.annotate() self.normalize_intensities() with mz_sorted(self): self.iisort = np.argsort(self.intensities)[::-1] self.irank = np.arange(len(self.mzs)) self.imzsort = np.argsort(self.mzs) self.sorted_by = 'intensities'
[docs] def reload(self): modname = self.__class__.__module__ mod = __import__(modname, fromlist = [modname.split('.')[0]]) imp.reload(mod) new = getattr(mod, self.__class__.__name__) setattr(self, '__class__', new)
def __len__(self): return len(self.mzs)
[docs] def sort_mz(self): """ Sorts the scan by m/z values ascending. """ if self.sorted_by == 'mzs': return elif hasattr(self, 'imzsort') and self.sorted_by == 'intensities': self.sort(self.imzsort) else: isort = np.argsort(self.mzs) self.sort(isort) self.sorted_by = 'mzs'
[docs] def sort_intensity(self): """ Sorts the scan by intensities descending. """ if self.sorted_by == 'intensities': return elif hasattr(self, 'iisort') and self.sorted_by == 'mzs': self.sort(self.iisort) else: isort = np.argsort(self.intensities)[::-1] self.sort(isort) self.sorted_by = 'intensities'
[docs] def sort(self, isort): """ Applies sorted indices to the scan. """ self.intensities = self.intensities[isort] self.mzs = self.mzs[isort] for attr in ('irank', 'annot', 'inorm'): if hasattr(self, attr): setattr(self, attr, getattr(self, attr)[isort]) for ad, data in iteritems(self.adducts): data['annot'] = data['annot'][isort]
[docs] def annotate(self): """ Annotates the fragments in the scan with identities provided by the fragment database. """ self.annot = self.get_annot()
[docs] def get_annot(self, precursor = None, tolerance = None): """ Returns array of annotations. Makes it possible to use different precursor or tolerance. """ precursor = precursor or self.precursor tolerance = tolerance or self.tolerance annotator = fragdb.FragmentAnnotator( self.mzs, self.ionmode, precursor, tolerance = tolerance, ) return np.array(list(annotator)) # this is array
# only to be sortable
[docs] def normalize_intensities(self): """ Creates a vector of normalized intensities i.e. divides intensities by their maximum. """ self.imax = self.intensities.max() self.inorm = self.intensities / self.imax
[docs]class Scan(ScanBase): method_hg = { 'fa_neg_1': ('FA', ()), } def __init__( self, mzs, ionmode, precursor = None, intensities = None, ms1_records = None, scan_id = None, sample_id = None, source = None, deltart = None, logger = None, verbose = False, tolerance = None, ms1_tolerance = None, rt = None, ): ScanBase.__init__( self, mzs, ionmode, precursor, intensities, tolerance = tolerance, ) # get some settings self.ms1_tolerance = ms1_tolerance or settings.get('ms1_tolerance') self.check_ratio_g = settings.get( 'even_chain_fragment_intensity_ratios_gl_gpl' ) self.check_ratio_s = settings.get( 'even_chain_fragment_intensity_ratios_sl' ) self.iratio_logbase = settings.get( 'chain_fragment_instensity_ratios_logbase' ) self.chain_details = settings.get('ms2_scan_chain_details') if ms1_records is None and precursor is not None: # do the database lookup if not provided, # this is not efficient but makes possible # to easily use standalone `Scan` instances # for testing and such self.ms1_records = moldb.adduct_lookup( precursor, ionmode, tolerance = self.ms1_tolerance ) else: # even if precursor is None, we end up with an empty dict self.ms1_records = ms1_records or {} self.scan_id = scan_id self.sample_id = sample_id self.source = source self.deltart = deltart self.rt = rt self.log = logger self.verbose = verbose self.scan_details = ScanDetails( sample_id = self.sample_id, scan_id = self.scan_id, source = self.source, deltart = self.deltart, )
[docs] @classmethod def from_mgf( cls, fname, scan_id, ionmode, sample_id = None, precursor = None, mgf_charge = None, **kwargs ): mgfreader = mgf.MgfReader(fname, charge = mgf_charge) sc = mgfreader.scan_by_id(scan_id) precursor = precursor or mgfreader.precursor_by_id(scan_id) if sc is not None: return cls( mzs = sc[:,0], intensities = sc[:,1], ionmode = ionmode, precursor = precursor, scan_id = scan_id, sample_id = sample_id, source = fname, **kwargs )
[docs] def reload(self): modname = self.__class__.__module__ mod = __import__(modname, fromlist=[modname.split('.')[0]]) imp.reload(mod) new = getattr(mod, self.__class__.__name__) setattr(self, '__class__', new)
[docs] def print_scan(self): """ Prints the list of fragments as an annotated table. """ if self.log: self.log.msg(self.scan_str())
[docs] def show(self): """ Prints the scan table to standard output. """ sys.stdout.write(self.scan_str())
[docs] def scan_str(self): """ Returns the scan table as string. """ lindent = ' ' * 12 header = '%s\n%s%s\n' % ( ''.join(( lindent, 'Frag. m/z'.ljust(12), 'Intensity'.ljust(12), 'Identity'.ljust(36), 'NL mass'.rjust(12) )), lindent, '=' * 12 * 6 ) table = '\n'.join(( ''.join(( lindent, '%12.4f' % self.mz[i], '%10u' % self.intensities[i], ann.name, ( '%12.4f' % self.nl(mz[i]) if self.precursor else 'NA'.rjust(12) ) )) for i in xrange(len(self.mz)) for ann in ( self.annot[i] if self.annot[i] else (Namespace(name = 'Unknown'),) ) )) return '%s\n%s\n\n' % ( self.sample.__str__(), header, table )
[docs] def html_table(self): # TODO pass
[docs] def nl(self, mz, adduct = None): """ For m/z returns the corresponding neutral loss m/z. If precursor ion mass is unknown returns `numpy.nan`. """ if adduct is None: return self.precursor - mz if self.precursor else np.nan else: self.adduct(adduct) return self.adducts[adduct]['fake_precursor'] - mz
[docs] def full_list_str(self): """ Returns list of fragments as single string. """ return '; '.join( '/'.join( '%s (%u)' % ( ann.name, self.intensities[i] ) if ann is not None else 'Unknown (%.03f) (%u)' % ( self.mzs[i], self.intensities[i] ) ) for i in xrange(len(self)) for ann in ( self.annot[i] if self.annot[i] else (None,) ) )
[docs] def most_abundant_mz(self): """ Returns the m/z of the fragment with highest intensity. """ result = self.mzs[0] if self.verbose: self.log.msg('\t\t -- Most abundant m/z is %.03f' % result) return result
[docs] def mz_match(self, mz_detected, mz): """ Tests if two m/z's can be accepted to be equal. """ return lookup.match(mz_detected, mz, self.tolerance)
[docs] def mz_lookup(self, mz): """ Returns the index of the closest m/z value detected in the scan if it is within the range of tolerance, otherwise None. """ self.sort_mz() imz = lookup.find(self.mzs, mz, self.tolerance) i = self.imzsort[imz] if imz else None self.sort_intensity() return i
[docs] def has_mz(self, mz): """ Tells if an m/z exists in this scan. """ result = self.mz_lookup(mz) is not None if self.verbose: self.log.msg( '\t\t -- m/z %.03f occures in this scan? -- %s' % ( mz, str(result) ) ) return result
[docs] def has_nl(self, nl, adduct = None): """ Tells if a neutral loss exists in this scan. """ result = self.has_mz(self.nl(nl, adduct = adduct)) if self.verbose: self.feature.msg( '\t\t -- neutral loss of %.03f occures in ' 'this scan? Looked up m/z %.03f - %.03f = %.03f -- %s' % ( nl, self.precursor, nl, self.nl(nl, adduct = adduct), str(result) ) ) return result
[docs] def fragment_by_name(self, name, adduct = None): """ Returns the index of a fragment by its name. Returns `None` if the fragment does not exist in the scan. Returns `False` if the fragment name could not be found in the database. The lookup still goes by m/z, the name first looked up in the fragment database and the scan searched for the corresponding m/z. The name makes if obvious if this is a charged fragment or a neutral loss, hence it is not necessary to provide this information. Args ---- :param str name: Fragment full name as used in the database 2nd column, e.g. `PE [P+E] (140.0118)`. """ frag = fragdb.by_name(name, self.ionmode) if frag is not None: if frag[6] == 0: return self.nl_lookup(frag[0], adduct = adduct) else: return self.mz_lookup(frag[0]) return False
[docs] def has_fragment(self, name, adduct = None): """ Tells if a fragment exists in this scan by its name. Returns bool or `None` if fragment name could not be found in the database. """ i = self.fragment_by_name(name, adduct = adduct) return None if i is False else i is not None
[docs] def nl_lookup(self, nl, adduct = None): """ Looks up if a neutral loss exists in this scan and returns its index. """ return self.mz_lookup(self.nl(nl, adduct = adduct))
[docs] def most_abundant_fragment_is(self, name, adduct = None): """ Tells if the fragment name is the highest abundant. Returns `None` if the fragment name could not be found in the database. """ frag = fragdb.by_name(name, self.ionmode) if frag is not None: mz = ( self.nl(frag[0], adduct = adduct) if frag[6] == 0 else frag[0] ) return self.mz_match(self.mzs[0], mz)
[docs] def fragment_among_most_abundant(self, name, n = 2, adduct = None): """ Tells if the fragment is among the top `n`. """ frag = fragdb.by_name(name, self.ionmode) if frag is not None: mz = ( self.nl(frag[0], adduct = adduct) if frag[6] == 0 else frag[0] ) return self.mz_among_most_abundant(mz, n = n)
[docs] def fragment_percent_of_most_abundant( self, name, percent = 80.0, adduct = None, ): """ Tells if a fragment has at least certain percent of intensity compared to the highest peak. """ frag = fragdb.by_name(name, self.ionmode) if frag is not None: mz = ( self.nl(frag[0], adduct = adduct) if frag[6] == 0 else frag[0] ) return self.mz_percent_of_most_abundant(mz, percent = percent)
[docs] def most_abundant_mz_is(self, mz): """ Tells if the m/z with the highest intensity is `mz`. Returns `None` if the fragment name could not be found in the database. """ result = self.mz_match(self.most_abundant_mz(), mz) if self.verbose: self.log.msg( '\t\t -- Is m/z %.03f the most abundant one? -- %s' % ( mz, str(result) ) ) return result
[docs] def mz_among_most_abundant(self, mz, n = 2): """ Tells if an m/z is among the most aboundant `n` fragments in a spectrum. Args ---- :param float mz: The m/z value. :param int n: The number of most abundant fragments considered. """ self.sort_mz() i = lookup.find( self.mzs[self.irank < n], # intensity rank < n mz, self.tolerance ) self.sort_intensity() if self.verbose: self.log.msg( '\t\t -- m/z %.03f is among the %u most abundant? -- %s' % ( mz, n, str(i is not None) ) ) return i is not None
[docs] def nl_among_most_abundant(self, nl, n = 2, adduct = None): """ Tells if a neutral loss corresponds to one of the most aboundant `n` fragments in a spectrum. Args ---- :param float nl: The mass of the neutral loss. :param int n: The number of most abundant fragments considered. """ result = self.mz_among_most_abundant( self.nl(nl, adduct = adduct), n = n, ) if self.verbose: self.log.msg( '\t\t -- neutral loss %.03f is among ' 'the %u most abundant? -- %s' % ( nl, n, str(result) ) ) return result
[docs] def get_intensity(self, mz): """ Returns the relative intensity of a fragment ion from its m/z. Value is `None` if m/z does not present. """ i = self.mz_lookup(mz) if i is not None: return self.inorm[i] return None
[docs] def get_nl_intensity(self, nl, adduct = None): """ Returns the relative intensity of a neutral loss fragment ion. Value is `None` if neutral loss does not present. """ return self.get_intensity(self.nl(nl, adduct = adduct))
[docs] def mz_percent_of_most_abundant(self, mz, percent = 80.0): """ Tells if an m/z has at least certain percent of intensity compared to the most intensive fragment. Args ---- :param float mz: The m/z value. :param float percent: The threshold in percent of the highest intensity. """ i = self.get_intensity(mz) result = i and i >= percent / 100. if self.verbose: self.feature.msg( '\t\t -- m/z %.03f has abundance at least %.01f %% of' ' the highest abundance? -- %s\n' % ( mz, percent, str(result) ) ) return result
[docs] @classmethod def match_chtype(cls, value, accepted): """ Matches strings or strings to set of strings, optionally negative. Calls `match_chattr` with `typ = basestring`. """ return cls.match_chattr(value, accepted, typ = basestring)
[docs] @staticmethod def match_chattr(value, accepted, typ = int): """ Args ---- :param int value: The actual value. :param int,set accepted: A single value or a set of values to match against. Negative match is possible by providing a tuple with `False` as it's first element and the set of not acceptable values as the second element. """ return ( accepted is None or # simple match (isinstance(accepted, typ) and value == accepted) or ( not isinstance(accepted, typ) and ( # multiple values value in accepted or ( # negation hasattr(accepted, '__getitem__') and accepted[0] == False and value not in accepted[1] ) ) ) )
[docs] @classmethod def match_annot( cls, annot, frag_type = None, chain_type = None, c = None, u = None ): """ Tests a fragment annotation against criteria of fragment type, chain type, carbon count and unsaturation. """ return all(( cls.match_chattr(annot.fragtype, frag_type, typ = basestring), cls.match_chattr(annot.chaintype, chain_type, typ = basestring), cls.match_chattr(annot.c, c), cls.match_chattr(annot.u, u), ))
[docs] def highest_fragment_by_chain_type( self, head = None, frag_type = None, chain_type = None, c = None, u = None, adduct = None, ): """ Returns the highest instensity fragment matching a particular chain type. Returns fragment index or `None` if no such fragment exists. Arguments passed to `chain_fragment_type_is`. """ frags = self.fragments_by_chain_type( head = head, frag_type = frag_type, chain_type = chain_type, c = c, u = u, adduct = adduct, ) try: return next(frags) except StopIteration: return None
[docs] def fragments_by_chain_type( self, head = None, frag_type = None, chain_type = None, c = None, u = None, adduct = None, ): """ Collects fragments matching a particular chain type. Yields indices. Arguments passed to `chain_fragment_type_is`. """ head = len(self.mzs) if head is None else min(head, len(self.mzs)) for i in xrange(head): if self.chain_fragment_type_is( i, frag_type = frag_type, chain_type = chain_type, c = c, u = u, return_annot = False, adduct = adduct, ): yield i
[docs] def chain_fragment_type_among_most_abundant( self, n = 2, frag_type = None, chain_type = None, c = None, u = None, adduct = None, ): """ Tells if a particular type of aliphatic chain fragment can be found among the `n` highest intensity fragments. Arguments passed to `chain_fragment_type_is`. """ return bool(len(list( self.fragments_by_chain_type( head = n, frag_type = frag_type, chain_type = chain_type, c = c, u = u, adduct = adduct, ) )))
[docs] def chain_fragment_type_is( self, i, frag_type = None, chain_type = None, c = None, u = None, return_annot = False, adduct = None, ): """ Tells if an aliphatic chain fragment is a specified type. The type should be the string representation of the fragment, e.g. `FA-O` for fatty acid minus oxygen fragments. Returns bool or fragment annotations if `return_annot = True`. Args ---- :param int i: Index of the fragment. :param bool return_annot: Return iterator with the matching fragment annotations. """ if i >= len(self.mzs): return False annot = self.annot if adduct is None else self.adduct_annot(adduct) result = any(( self.match_annot(an, frag_type, chain_type, c, u) for an in annot[i] )) if self.verbose: criteria = [] if frag_type is not None: criteria.append('of type `%s`' % frag_type) if chain_type is not None: criteria.append('of chain type `%s`' % chain_type) if c is not None: criteria.append('with carbon count of %a' % c) if u is not None: criteria.append('with unsaturation of %a' % u) self.log.msg( '\t\t -- Fragment #%u (%.03f): ' 'is it a fragment %s? -- %s' % ( i, self.mz[i], ' and '.join(criteria), str(result) ) ) if return_annot: result = ( an for an in annot[i] if self.match_annot(an, frag_type, chain_type, c, u) ) return result
[docs] def chains_of_type( self, chain_type = None, frag_type = None, c = None, u = None, yield_annot = False, adduct = None, ): """ Iterates chain fragments matching certain criteria. Yields fragment indices or indices with annotations. Args ---- :param bool yield_annot: Yield tuples of indices and annotations instead of indices only. """ for i in xrange(len(self.mzs)): if self.chain_fragment_type_is( i = i, chain_type = chain_type, frag_type = frag_type, c = c, u = u, adduct = adduct, ): if yield_annot: for annot in self.chain_fragment_type_is( i = i, chain_type = chain_type, frag_type = frag_type, c = c, u = u, return_annot = True, adduct = adduct, ): yield i, annot else: yield i
[docs] def has_chain_fragment_type( self, chain_type = None, frag_type = None, c = None, u = None, adduct = None, ): """ Tells if at least one fragment matches certain criteria. Arguments passed to `chain_fragment_type_is`. """ return self.highest_fragment_by_chain_type( chain_type = chain_type, frag_type = frag_type, c = c, u = u, adduct = adduct, ) is not None
[docs] def matching_chain_combinations( self, record, head = None, intensity_threshold = None, expected_intensities = None, no_intensity_check = False, chain_param = (), adduct = None, ): """ Provides a way to see if specific chain combinations exist. The database record defines the chain layout of the molecule. Further arguments are passed to `chain_combinations`. The `chain_param` tuple contains dicts to match against chain fragments. All of these dicts must match at least one fragment identification. Only combinations matching all criteria yielded. Args ---- :param lipproc.LipidRecord record: A lipid database record matching the MS1 m/z. :param int head: Consider only the n most intensive fragments. :param float intensity_threshold: Consider only fragments with intensity higher than threshold. Relative to highest fragment, between 0 and 1. :param expected_intensities: See at `intensity_ratios`. :param bool no_intensity_check: Completely skip checking intensity ratios. :param tuple chain_param: Tuple of dicts. Each dict contains criteria for one chain moiety. Keys can be `chain_type`, `frag_type`, `c` and `u`. These can be single str or int values or sets of multiple values. If omitted or `None` any value will pass the filter. An empty tuple which is the default value will pass through everything, this is equivalent with calling `chain_combinations`. """ def match(key, param, value): return ( key not in param or param[key] is None or ( type(param[key]) in {int, str} and value == param[key] ) or ( type(param[key]) in {set, list, tuple} and value in param[key] ) ) if ( record.chainsum and len(record.chainsum) > 1 and len(chain_param) == 1 ): chain_param = chain_param * len(record.chainsum) for chains, details in self.chain_combinations( record, head = None, intensity_threshold = 0, expected_intensities = None, no_intensity_check = False, frag_types = None, fragment_details = True, adduct = adduct, ): if ( not chain_param or all(( not param or any(( all(( match('chain_type', param, ch.typ), match('frag_type', param, details.fragtype[i]), match('c', param, ch.c), match('u', param, ch.u), )) for i, ch in enumerate(chains) )) for param in chain_param )) ): yield chains, details
[docs] def has_chain_combination( self, record, head = None, intensity_threshold = None, expected_intensities = None, no_intensity_check = False, chain_param = (), adduct = None, ): """ Tells if a certain chain combination exists in the scan. Arguments passed to `matching_chain_combinations`. """ ccomb = self.matching_chain_combinations( record = record, head = head, intensity_threshold = intensity_threshold, expected_intensities = expected_intensities, no_intensity_check = no_intensity_check, chain_param = chain_param, adduct = adduct, ) try: _ = next(ccomb) return True except StopIteration: return False
def _matching_chain_pairs( self, record, chain_type = None, frag_type = None, c = None, u = None, partner_chain_types = None, partner_frag_types = None, count_only = False, adduct = None, ): # small caching of constraint matching type_pos = {} def get_positions(self, frag_type): if frag_type not in type_pos: type_pos[frag_type] = self.positions_for_frag_type( record, frag_type ) return type_pos[frag_type] # ## for i, iannot in self.chains_of_type( chain_type = chain_type, frag_type = frag_type, c = c, u = u, yield_annot = True, adduct = adduct, ): partner_c = record.chainsum.c - annot.c partner_u = record.chainsum.u - annot.u if partner_c < 1 or partner_u < 0: continue pos_i = get_positions(iannot.fragtype) for j, jannot in self.chains_of_type( c = partner_c, u = partner_u, yield_annot = True, adduct = adduct, ): if ( partner_chain_types is None or jannot.chaintype in partner_chain_types ) and ( partner_frag_types is None or jannot.fragtype in partner_frag_types ): pos_j = get_positions(jannot.fragtype) if ( not pos_i or not pos_j or ( len(pos_i) == 1 and len(pos_j) == 1 and not pos_i - pos_j ) ): continue yield ( lipproc.Chain( ) )
[docs] def positions_for_frag_type(self, record, frag_type): """ Returns the possible chain positions for a record and a fragment type. """ # constraints for the fragment type constr = fragdb.constraints(frag_type, self.ionmode) # set of possible positions of the chain # which this fragment originates from return lipproc.match_constraints(record, constr)[1]
[docs] def is_chain(self, i, adduct = None): """ Examines if a fragment has an aliphatic chain. """ annot = self.adduct_annot(adduct) result = any(not np.isnan(an.c) for an in annot[i]) if self.verbose: self.log.msg( '\t\t -- Fragment #%u (%.03f)' 'has an aliphatic chain? -- %s' % ( i, self.mzs[i], str(result) ) ) return result
[docs] def is_chain_type(self, i, typ = 'FA', adduct = None): """ Checks if a fragment might origin from a certain aliphatic chain type (e.g. `FA` -- fatty acyl, `FAL` -- fatty alkyl, `Sph` -- sphingosin base). """ return self.chain_fragment_type_is( i, chain_type = typ, adduct = adduct )
[docs] def is_fa(self, i, adduct = None): """ Tells if a fragment origins from a fatty acyl moiety. """ return self.is_chain_type(i, adduct = adduct)
[docs] def is_fal(self, i, adduct = None): """ Tells if a fragment origins from a fatty alkyl moiety. """ return self.is_chain_type(i, 'FAL', adduct = adduct)
[docs] def is_sph(self, i, adduct = None): """ Tells if a fragment origins from a shpingosin backbone. """ return self.is_chain_type(i, 'Sph', adduct = adduct)
[docs] def is_type(self, i, typ, adduct = None): """ Tells if a fragment is a certain type. """ return self.chain_fragment_type_is( i, frag_type = typ, adduct = adduct )
[docs] def annot_by_type( self, i, chain_type = None, frag_type = None, adduct = None, ): """ Returns the annotations matching certain types. """ annot = self.adduct_annot(adduct) return tuple( an for an in annot[i] if ( self.match_chtype(an.chaintype, chain_type) and self.match_chtype(an.fragtype, frag_type) ) )
[docs] def cu_by_type( self, i, chain_type = None, frag_type = None, adduct = None, ): """ Returns `(carbon count, unsaturation)` tuples for fragment `i` considering only the the requested chain types and fragment types. """ return tuple( (a.c, a.u) for a in self.annot_by_type( i, chain_type = chain_type, frag_type = frag_type, adduct = adduct, ) )
def _build_chain_list(self, annot = None): """ Builds a list of chains which facilitates the anlysis of chain combinations. """ annot = annot if type(annot) is np.ndarray else self.annot return tuple( ChainFragment( a.c, a.u, a.fragtype, a.chaintype, i, self.intensities[i] ) for i, aa in enumerate(annot) for a in aa if a.c and not np.isnan(a.c) )
[docs] def build_chain_list(self, rebuild = False): if ( not rebuild and hasattr(self, 'chain_list') ): return self.chain_list = self._build_chain_list()
[docs] def chain_among_most_abundant( self, head = 1, chain_type = None, frag_type = None, c = None, u = None, min_mass = None, skip_non_chains = False, adduct = None, ): """ Returns `True` if the defined type of chain fragment can be found among the most abundant fragments. """ if self.verbose: self.log.msg( '\t\t -- Checking for certain type of chain among the top ' '%u fragments.' % head ) result = any(( self.chain_fragment_type_is( i, frag_type = frag_type, chain_type = chain_type, c = c, u = u, adduct = adduct, ) for i in ( xrange(head) if not skip_non_chains else itertools.islice( ( i for i in xrange(len(self.mzs)) if ( not skip_non_chains or self.is_chain(i) ) and ( min_mass is None or self.mzs[i] >= min_mass ) ), head ) ) )) if self.verbose: self.log.msg( '\t\t -- Checked certain type of chain among the top ' '%u fragments. -- %s' % (head, str(result)) ) return result
[docs] def get_most_abundant_chain( self, head = 1, frag_type = None, chain_type = None, c = None, u = None, adduct = None, ): """ Looks up the most abundant fatty acid fragment of the given type. Returns the fragment index. """ for i in xrange(len(self)): if self.chain_fragment_type_is( i, frag_type = frag_type, chain_type = chain_type, c = c, u = u, adduct = adduct, ): return i
[docs] def chain_percent_of_most_abundant( self, percent, frag_type = None, chain_type = None, c = None, u = None, adduct = None, ): """ Tells if a certain chain present with an abundance at least the given percent of the most abundant fragment. Args ---- :param float percent: Percentage, between 0 and 100. """ result = any(( self.chain_among_most_abundant( i, frag_type = frag_type, chain_type = chain_type, c = c, u = u, adduct = adduct, ) for i in itertools.takewhile( lambda i: self.inorm[i] > percent / 100.0, xrange(len(self.mzs)) ) )) return result
[docs] def mz_most_abundant_fold(self, mz, fold): """ Tells if an m/z is the most abundant fragment and it has at least a certain fold higher intensity than any other fragment. Args ---- :param float mz: The m/z value. :param float fold: The m/z must be this times higher than any other. """ result = ( self.most_abundant_mz_is(mz) and ( len(self.mzs) == 1 or self.intensities[1] * fold <= self.intensities[0] ) ) if self.verbose: self.log.msg( '\t\t -- m/z %.03f is at least %u times higher than ' 'any other? -- %s\n' % (mz, fold, str(result)) ) return result
[docs] def cer_fa_test(self, i_fa, i_sph, adduct = None): return ( self.chain_fragment_type_is( i_fa, frag_type = 'FA+C2+NH2-O', adduct = adduct, ) and self.chain_fragment_type_id( i_sph, frag_type = 'Sph-C2H4-NH2-H2O', adduct = adduct, ) and self.intensities[i_fa] > self.intensities[i_sph] * 2 )
[docs] def has_chain_combinations(self, rec, adduct = None, **kwargs): """ Calls `chain_combinations` only to check if at least one conbination explicitely confirmed. """ ccomb = self.chain_combinations(rec, adduct = adduct, **kwargs) try: _ = next(ccomb) return True except StopIteration: return False
[docs] def chain_combinations( self, rec, head = None, intensity_threshold = 0, expected_intensities = None, no_intensity_check = False, frag_types = None, fragment_details = None, adduct = None, ): """ Finds all combinations of chain derived fragments matching the total carbon count and unsaturation in a database record. Yields tuple of chains (`lipproc.Chain` objects). Arguments not listed here explained at `frags_for_positions`. Args ---- :param lipproc.LipidRecord rec: The database record to match against. :param bool no_intensity_check: Completely skip checking intensity ratios. :param float intensity_threshold: Only fragments with intensities above this threshold will be considered. Intensities relative to the highest, between 0 and 1. :param tuple frag_types: See at `frags_for_positions`. """ if not rec.chainsum and not rec.chains: return self.build_chain_list() chainsum = rec.chainsum or lipproc.sum_chains(rec.chains) frags_for_position = self.frags_for_positions( rec, head = head, intensity_threshold = intensity_threshold, frag_types = frag_types, adduct = adduct, ) if len(frags_for_position) != len(rec.chainsum.typ): # if one or more chains have no corresponding fragment # we do not yield anything; # for finding missing those chains `missing_chains` # can be used return # iterate all combinations for frag_comb in itertools.product( *( # making a sorted list of lists from the dict i[1] for i in sorted(frags_for_position.items(), key = lambda i: i[0]) ) ): if ( sum(frag.c for frag in frag_comb) == chainsum.c and sum(frag.u for frag in frag_comb) == chainsum.u ): if ( # bypass intensity check no_intensity_check or self._intensity_check( frag_comb, chainsum, expected_intensities ) ): # now all conditions satisfied: yield self._chains_frag_comb( frag_comb, chainsum, details = fragment_details )
[docs] def frags_for_positions( self, rec, head = None, intensity_threshold = 0, frag_types = None, adduct = None, ): """ Returns the possible fragments for each positions (sn1, sn2 in glycerophospholipids, sphingosine base and N-acyl in sphingolipids, etc). :param int head: If `None` or `numpy.inf` all fragment ions will be considered, otherwise only the first most aboundant until the number `head`. :param float intensity_threshold: Only fragments with intensities above this threshold will be considered. Intensities relative to the highest, between 0 and 1. :param tuple frag_types: Limit the query to certain fragment types in addition to built in fragment constraints and other criteria. A tuple of tuples with fragment type names can be provided each for one position with None values where default fragment types should be used. E.g. `(('FA_mH', 'Lyso_PA'), None)` means the chain in first position might be found as fatty acid minus hydrogen fragment or lysophosphatidic acid fragment, while the second position could be anything allowed by the built in constraints. """ frags_for_position = collections.defaultdict(list) chain_list = self.adduct_chain_list(adduct) for frag in chain_list: if ( (head and frag.i >= head) or self.inorm[frag.i] < intensity_threshold ): break chpos = self.positions_for_frag_type(rec, frag.fragtype) for ci in chpos: if ( # frag_types constraints not frag_types or not frag_types[ci] or frag.fragtype in frag_types[ci] ): frags_for_position[ci].append(frag) return dict(frags_for_position)
[docs] def intensity_ratios( self, intensities, frag_indices = None, expected = None, logbase = None, ): """ Tells if the ratio of a list of intensities fits the one in `expected` or is more or less even if `expected` is `None`. :param list intensities: List of intensities. :param list expected: List with expected intensity proportions. E.g. `[1, 1, 2]` means the third ion is twice higher intense than the 2 others. :param int logbase: The fold difference tolerance when comparing intensities. E.g. if this is 2, then an almost twice less or more intense ion will considered to have similar intensity. """ logbase = settings.get('chain_fragment_instensity_ratios_logbase') if len(intensities) == 1: return True if any(i <= 0.0 for i in intensities): raise ValueError( 'Negative intensity value encountered' '(sample=%s, ion mode=%s, scan=%u)' % ( str(self.sample), self.ionmode, self.scan_id ) ) frag_indices = frag_indices or list(range(len(intensities))) # to know if one fragment contributes more than one times; # intensities divided by the times the fragment is incident cntr = collections.Counter(frag_indices) # by default expecting more or less equal intensities if expected is None: expected = [1.0] * len(intensities) # intensities corrected by the expected and the counts intcorr = [ ins / expected[i] / cntr[ind] for (i, ins), ind in zip(enumerate(intensities), frag_indices) ] return ( all(( math.log(co[0], logbase) - math.log(co[1], logbase) <= 1 for co in itertools.combinations(intcorr, 2) )) )
def _intensity_check( self, frag_comb, chainsum, expected_intensities = None ): """ Performs the chain intensity ratio check according to settings. """ return ( not ( # need to check intensity ratios (chainsum.typ[0] == 'Sph' and self.check_ratio_s) or (chainsum.typ[0] != 'Sph' and self.check_ratio_g) or expected_intensities ) or self.intensity_ratios( # intensity ratios are ok intensities = tuple(f.intensity for f in frag_comb), frag_indices = tuple(f.i for f in frag_comb), expected = expected_intensities, logbase = self.iratio_logbase ) ) def _chains_frag_comb( self, frag_comb, chainsum, details = None, missing_position = None, missing_chain = None, ): """ Returns a tuple of chains from a fragment annotation combination and a database record chain summary object. Potentially includes a missing chain which does not yield any fragment. """ # boolean: whether we provide details or not details = self.chain_details if details is None else details return ( tuple( lipproc.Chain( c = frag_comb[ifrag].c, u = frag_comb[ifrag].u, typ = frag_comb[ifrag].chaintype, attr = lipproc.ChainAttr( # take the sphingosine base type # from the chainsum of the record sph = chainsum.attr[ichain].sph, ether = frag_comb[ifrag].chaintype == 'FAL', oh = chainsum.attr[ichain].oh ) ) if ifrag is not None else missing_chain # chain indices and fragment indices for ichain, ifrag in iterator_insert( len(chainsum), missing_position, ) ), ChainIdentificationDetails( rank = tuple( frag_comb[ifrag].i if ifrag is not None else None for ichain, ifrag in iterator_insert( len(chainsum), missing_position, ) ), i = tuple( self.inorm[frag_comb[ifrag].i] if ifrag is not None else None for ichain, ifrag in iterator_insert( len(chainsum), missing_position, ) ), fragtype = tuple( frag_comb[ifrag].fragtype if ifrag is not None else None for ichain, ifrag in iterator_insert( len(chainsum), missing_position, ) ), ) if details else None )
[docs] def missing_chain( self, rec, missing_position = 1, head = None, intensity_threshold = 0, expected_intensities = None, no_intensity_check = False, frag_types = None, adduct = None, ): """ Finds ''missing'' chains i.e. which could complement the chains identified among the fragments to fit the species in the record. Yields tuples with first element a tuple of identified chains and as second element the missing chain. Works a similar way to `chain_combinations`. Args ---- :param int missing_position: Position of the missing chain. 0, 1, 2 are sn1, sn2 and sn3 positions on glycerol, 0 and 1 are sphingosine base and N-acyl in sphingolipids, respectively. By default is 1 (sn2 or N-acyl). """ chain_list = self.adduct_chain_list(adduct = adduct) chainsum = rec.chainsum or lipproc.sum_chains(rec.chains) frags_for_position = self.frags_for_positions( rec, head = head, intensity_threshold = intensity_threshold, frag_types = frag_types, adduct = adduct, ) if missing_position >= len(rec.chainsum.typ): raise ValueError( 'No chain known at position %u' % missing_position ) if missing_position in frags_for_position: chains_at_missing = frags_for_position[missing_position] del frags_for_position[missing_position] else: chains_at_missing = [] # iterate all combinations for frag_comb in itertools.product( *( # making a sorted list of lists from the dict i[1] for i in sorted(iteritems(frags_for_position), key = lambda i: i[0]) ) ): # if more than one chain missing if len(rec.chainsum) - len(frag_comb) > 1: continue missing_c = chainsum.c - sum(frag.c for frag in frag_comb) missing_u = chainsum.u - sum(frag.u for frag in frag_comb) # do not yield impossible values if missing_c < 1 or missing_u < 0 or missing_u > missing_c - 1: continue if ( # bypass intensity check no_intensity_check or self._intensity_check( frag_comb, chainsum, expected_intensities ) ): missing_chain = lipproc.Chain( c = missing_c, u = missing_u, typ = chainsum.typ[missing_position], attr = chainsum.attr[missing_position] ) # now all conditions satisfied: yield self._chains_frag_comb( frag_comb, chainsum, missing_position = missing_position, missing_chain = missing_chain, )
[docs] def cu_complete(self, chainsum, chain = None, c = None, u = None): """ Returns the carbon count and unsaturation needed to complete the `chain` or `c` and `u` to fit the `chainsum`. Returns tuple of c and u. """ c = c or chain.c u = u or chain.u return chainsum.c - c, chainsum.u - u
[docs] def iterrecords(self, adducts = None): """ Iterates MS1 records. Yields tuple of adduct type and record. """ for add, recs in iteritems(self.ms1_records): if adducts is None or add in adducts: for rec in recs[1]: yield add, rec
[docs] def records_by_type(self, headgroup, sub = (), adducts = None): """ Iterates MS1 database records with a certain headgroup and subtype. """ sub = ( sub if type (sub) is set else set(sub) if type(sub) is list or type(sub) is tuple else set([sub]) ) for add, rec in self.iterrecords(adducts = adducts): if rec.hg and rec.hg.main == headgroup and set(rec.hg.sub) == sub: yield rec
[docs] def first_record(self, headgroup, sub = (), adducts = None): """ Returns the first MS1 database record matching headgroup and subtype. """ recbytyp = self.records_by_type( headgroup, sub = sub, adducts = adducts ) try: return next(recbytyp) except StopIteration: return None
[docs] def identify(self, adducts = None): result = {} for add, rec in self.iterrecords(adducts): if rec.hg is None: continue rec_str = rec.summary_str() if rec_str not in result and rec.hg in idmethods[self.ionmode]: method = idmethods[self.ionmode][rec.hg] adduct = None if add in {'[M+H]+', '[M-H]-'} else add result[rec_str] = tuple( method( record = rec, scan = self, adduct = adduct, ).identify() ) return result
# # Sphingolipids #
[docs] def cer1p_neg_1(self): """ Examines if a negative mode MS2 spectrum is a Ceramide-1-phosphate. **Specimen:** - GLTPD1 - 616.47 **Principle:** - The most abundant fragment is 78.9591 metaphosphate. - If 96.9696 phosphate present adds to the score. """ score = 0 fattya = set([]) if self.most_abundant_mz_is(78.95905658): score += 5 if self.has_mz(96.96962158): score += 1 return {'score': score, 'fattya': fattya}
[docs] def hexcer_neg_1(self): """ Examines if a negative mode MS2 spectrum is a Hexosyl-Ceramide. **Specimen:** - GLTP - 744.5627 **Principle:** - Hexose fragments 71.0115, 89.0220 and 101.0219 must present. """ score = 0 fattya = set([]) if all(map(lambda mz: self.mz_among_most_abundant(mz, n = 10), # these are 3 fragments found at GLTP [71.0115000, 89.0220000, 101.021900])): score += 5 return {'score': score, 'fattya': fattya}
[docs] def hexceroh_neg_1(self): """ Examines if a negative mode MS2 spectrum is a Hexosyl-Ceramide-OH ('t'). This method is the same as `hexcer_neg_1`. **Specimen:** - GLTP - 760.557 **Principle:** - Hexose fragments 71.0115, 89.0220 and 101.0219 must present. """ return self.hexcer_neg_1()
[docs] def sm_neg_1(self): """ Examines if a negative mode MS2 spectrum is a Sphingomyeline. **Specimen:** - GLTPD1 - 745.55 **Principle:** - Must have a neutral loss of CH3+COOH (60.0211). - Phosphate+choline-CH3 fragment 168.0431 must be present. """ score = 0 fattya = set([]) if self.mz_among_most_abundant(168.0431206) and self.has_nl(60.02113): score += 5 return {'score': score, 'fattya': fattya}
[docs] def sph1p_neg_1(self): """ Examines if a negative mode MS2 spectrum is a Spingosine-1-phosphate. **Specimen:** - Only observed in standard. **Principle:** - Phosphate 78.9590 must be present. """ score = 0 fattya = set([]) if self.has_mz(78.95905658): score += 5 return {'score': score, 'fattya': fattya}
[docs] def cer_neg_1(self): """ Examines if a negative mode MS2 spectrum is Ceramide. **Specimen:** - SEC14L1 - 582.509 **Principle:** - A Ceramide backbone fragment must be among the 2 most abundant. - Ceramide backbone fragments lighter by N or C2N but same carbon count and unsaturation add to the score. """ score = 0 fattya = set([]) if self.fa_among_most_abundant('CerFA', n = 2): score += 5 fattya = self.fa_combinations('Cer', sphingo = True) fa_h_ccs = self.matching_fa_frags_of_type('Cer', 'CerFA(') for fa_h_cc in fa_h_ccs: for fa_other in [ '[CerFA-N(C%u:%u)-]-', '[CerFA-C2N(C%u:%u)-]-']: if self.frag_name_present(fa_other % fa_h_cc): score += 1 return {'score': score, 'fattya': fattya}
[docs] def cerp_neg_1(self): """ Examines if a negative mode MS2 spectrum is a Ceramide-1-phosphate. Gives similar result as Sphingosine-1-phosphate. **Specimen:** - GLTPD1 - 616.47 **Principle:** - The most abundant fragment must be 78.9591 metaphosphate. - Presence of 96.9696 phosphate increase the score. """ score = 0 fattya = set([]) if self.most_abundant_mz_is(78.95905658): score += 5 if self.has_mz(96.96962158): score += 1 return {'score': score, 'fattya': fattya}
[docs] def cerp_pos_1(self): """ Examines if a positive mode MS2 spectrum is a Ceramide-1-phosphate. **Specimen:** - GLTPD1 + 728.59, 590.45, 702.58, 618.430, 616.415, 640.409 **Principle:** - A sphingosine fragment with double H2O loss must be among the three highest abundant fragments. """ score = 0 fattya = set([]) if self.fa_among_most_abundant('-H2O-H2O+]+', n = 3, sphingo = True): score += 1 return {'score': score, 'fattya': fattya}
[docs] def adduct(self, adduct, reset = False): """ Creates a copy of the current scan assuming that the precursor is a certain adduct. The precursor will be converted to [M+H]+ or [M-H]- adduct and neutral losses will be calculated accordingly. """ if adduct in self.adducts and not reset: return ad2ex = settings.get('ad2ex')[1][self.ionmode][adduct] ex2ad = 'remove_h' if self.ionmode == 'neg' else 'add_h' fake_precursor = ( getattr( mzmod.Mz( getattr( mzmod.Mz(self.precursor), ad2ex )() ), ex2ad )() ) annot = self.get_annot(fake_precursor) chain_list = self._build_chain_list(annot = annot) self.adducts[adduct] = { 'fake_precursor': fake_precursor, 'annot': annot, 'chain_list': chain_list, }
[docs] def adduct_annot(self, adduct = None): """ Gets the annotations for a certain adduct. """ return self.adduct_data('annot', adduct = adduct)
[docs] def adduct_chain_list(self, adduct = None): """ Gets the chain list for a certain adduct. """ return self.adduct_data('chain_list', adduct = adduct)
[docs] def adduct_data(self, name, adduct = None): if adduct is None: return getattr(self, name) self.adduct(adduct) return self.adducts[adduct][name]
[docs] def get_ms1_records( self, hg, subtype = None, sph = None, ether = None, oh = None, databases = None, ): """ Iterates MS1 records for a given type. Yields tuples of record, adduct type and error in ppm """ subtype = subtype or () hg = ( hg if isinstance(hg, lipproc.Headgroup) else lipproc.Headgroup(main = hg, sub = subtype) ) for add, recs in iteritems(self.ms1_records): for rec_mz, rec, err_ppm in zip(*recs): if ( rec.hg == hg and ( databases is None or rec.lab.db in databases ) and ( sph is None or rec.chainsum.attr.sph == sph ) and ( ether is None or rec.chainsum.attr.ether == ether ) and ( oh is None or rec.chainsum.attr.oh == oh ) ): yield rec, add, err_ppm
[docs]class AbstractMS2Identifier(object): class_methods = {} subclass_methods = {} def __init__( self, record, scan, adduct = None, missing_chains = None, explicit_and_implicit = False, must_have_chains = True, chain_comb_args = {}, missing_chain_args = {}, ): self.score = 0 self.max_score = 0 self.rec = record self.scn = scan self.add = adduct self.missing_chains = ( missing_chains if missing_chains is not None else tuple(range(len(record.chainsum))) # any chain can be missing ) self.chain_comb_args = chain_comb_args self.missing_chain_args = missing_chain_args or self.chain_comb_args self.explicit_and_implicit = explicit_and_implicit self.must_have_chains = must_have_chains self.scores = {}
[docs] def identify(self): if not self.rec.hg: return self.confirm_class() chains_confirmed = False for chains in self.confirm_chains_explicit(): yield MS2Identity( max(self.score, 0), self.max_score, self.percent_score(), self.rec.hg, self.rec.chainsum, chains = chains[0], chain_details = chains[1], scan_details = self.scn.scan_details, ) chains_confirmed = True if not chains_confirmed or self.explicit_and_implicit: for chains in self.confirm_chains_implicit(): yield MS2Identity( max(self.score, 0), self.max_score, self.percent_score(), self.rec.hg, self.rec.chainsum, chains = chains[0], chain_details = chains[1], scan_details = self.scn.scan_details, ) chains_confirmed = True if not chains_confirmed and not self.must_have_chains and self.score: yield MS2Identity( max(self.score, 0), self.max_score, self.percent_score(), self.rec.hg, self.rec.chainsum, chains = None, chain_details = None, scan_details = self.scn.scan_details, )
[docs] def percent_score(self): """ Returns the score as a percentage of the maximum possible score. Zero maximum score means something is wrong, then it returns 200. """ return ( max(int(np.round(self.score / self.max_score * 100.)), 0) if self.max_score else 200 )
[docs] def confirm_class(self): """ In this base class pass through everything. Most of the subclasses should override this. """ self.score = 0 if self.rec.hg is not None and self.rec.hg.main in self.class_methods: score, max_score = getattr( self, self.class_methods[self.rec.hg.main] )() self.score += score self.max_score += max_score
[docs] def confirm_subclass(self): subclasses = self.rec.hg.sub or ('empty',) if self.rec.hg is not None: for sub in subclasses: if sub not in self.scores and sub in self.subclass_methods: score, max_score = getattr( self, self.subclass_methods[sub] )() self.scores[sub] = score self.score += score self.max_score += max_score
[docs] def confirm_chains_explicit(self): return self.scn.chain_combinations(self.rec, **self.chain_comb_args)
[docs] def confirm_chains_implicit(self): for missing in self.missing_chains: for chain_comb in self.scn.missing_chain( self.rec, missing_position = missing, **self.missing_chain_args ): yield chain_comb
[docs] def matching_chain_combinations( self, chain_param1, chain_param2, score_method = lambda ccomb: (min(ccomb, 3) * 2, 6), ): ccomb = len(list( self.scn.matching_chain_combinations( self.rec, chain_param = (chain_param1, chain_param2), ) )) score, max_score = score_method(ccomb) self.score += score self.max_score += score
[docs] def check_lyso(self, score_threshold = 5): """ Checks whether the this mass has been identified in the database as a lyso species and calls the corresponding lyso identification method. Returns ``True`` if the score from the lyso is larger than ``score_threshold``. """ rec_lyso = self.scn.first_record(self.rec.hg.main, sub = ('Lyso',)) if rec_lyso: lyso_hg = lipproc.Headgroup( main = self.rec.hg.main, sub = ('Lyso',), ) lyso = idmethods[self.scn.ionmode][lyso_hg](rec_lyso, self.scn) lyso.confirm_class() return lyso.score > score_threshold return False
# # Lipid identification methods # # # Fatty acids #
[docs]class FA_Negative(AbstractMS2Identifier): """ Examines if a negative mode MS2 spectrum is a fatty acid. Here we only check if the most abundant fragment is the fatty acid itself. **Specimen:** - in vitro FABP1 - **Principle:** - The most abundant fragment must be a fatty acid which matches the carbon count and the unsaturation of the whole molecule. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = { 'head': 1, 'frag_types': { 0: {'FA-H'} } }, **kwargs, )
[docs] def confirm_class(self): self.max_score = 10 if ( self.rec.chainsum and self.scn.chain_among_most_abundant( frag_type = 'FA-H', c = self.rec.chainsum.c, u = self.rec.chainsum.u, ) ): self.score = 10
[docs]class FA_Positive(AbstractMS2Identifier): """ Examines if a positive mode MS2 spectrum is a fatty acid. Here we only check if the most abundant fragment is the fatty acid itself. **Specimen:** - Not known **Principle:** - The most abundant fragment must be a fatty acid which matches the carbon count and the unsaturation of the whole molecule. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = { 'head': 1 } )
[docs] def confirm_class(self): self.max_score = 10 if ( self.rec.chainsum and self.scn.chain_among_most_abundant( frag_type = 'FA+H', c = self.rec.chainsum.c, u = self.rec.chainsum.u, ) ): self.score = 10
# # Glycerolipids #
[docs]class DAG_Positive(AbstractMS2Identifier): """ Examines if a positive mode MS2 spectrum is a DAG. **Specimen:** - in vivo: SEC14L2 + 584.52 - in vitro: BNIP2 + 770.67 **Principle:** - Combination of fatty acid fragments among the 10 most abundant fragments must match the expected carbon count and unsaturation. - If these are among the 5 highest fragments the score is higher. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, **kwargs, )
[docs] def confirm_class(self): self.max_score = 6 if self.scn.has_chain_combinations(self.rec, head = 10): self.score += 4 if self.scn.has_chain_combinations(self.rec, head = 6): self.score += 2
[docs]class DAG_Negative(AbstractMS2Identifier): """ Examines if a negative mode MS2 spectrum is a DAG. **Specimen:** - We don't have yet. **Principle:** - Combination of fatty acid fragments among the 10 most abundant fragments must match the expected carbon count and unsaturation. - If these are among the 5 highest fragments the score is higher. (Same as in positive ionmode.) """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, **kwargs, )
[docs] def confirm_class(self): self.max_score = 6 if self.scn.has_chain_combinations(self.rec, head = 10): self.score += 4 if self.scn.has_chain_combinations(self.rec, head = 6): self.score += 2
[docs]class TAG_Positive(AbstractMS2Identifier): """ Examines if a positive mode MS2 spectrum is a TAG. **Specimen:** - STARD11 + 818.7187 **Principle:** - Combination of fatty acid fragments must match the expected carbon count and unsaturation. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, **kwargs, )
[docs] def confirm_class(self): self.max_score = 10 if self.scn.has_chain_combinations(self.rec, head = 15): self.score += 5 if self.scn.has_chain_combinations(self.rec, head = 7): self.score += 5
[docs]class TAG_Negative(AbstractMS2Identifier): """ Examines if a negative mode MS2 spectrum is a TAG. **Specimen:** - We don't have yet. **Principle:** - Combination of fatty acid fragments must match the expected carbon count and unsaturation. (Same as in positive ionmode.) """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, **kwargs, )
[docs] def confirm_class(self): self.score = 0 self.max_score = 5 if self.scn.has_chain_combinations(self.rec): self.score += 5
[docs]class GL_Positive(AbstractMS2Identifier): """ Generic class for identification of glycerolipids. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, **kwargs, ) class_methods = { 'DGTS': 'dgts', 'DGCC': 'dgcc', 'DGTA': 'dgts', 'DGDG': 'dgdg', 'MGDG': 'mgdg', 'SQDG': 'sqdg', }
[docs] def confirm_class(self): if self.rec.hg.main in self.class_methods: getattr(self, self.class_methods[self.rec.hg.main])()
[docs] def dgts(self): self.max_score += 30 if self.scn.has_fragment('DGTS [G+TS] (236.1492)'): self.score += 10 self.score += sum(map(bool, ( self.scn.has_fragment('DGTS [TS] (144.1019)'), self.scn.has_chain_fragment_type('NL FA-H2O'), ) )) * 10
[docs] def dgcc(self): self.max_score += 20 if self.scn.has_fragment('PC/SM [Ch+H2O] (104.107)'): self.score += 10 if self.scn.has_fragment('DGCC [C2+Ch] (132.1388)'): self.score += 10
[docs] def sqdg(self): self.max_score += 10 if self.scn.has_fragment('NL [Hexose+SO3+H2O+H] (NL 261.0280)'): self.score += 10
[docs] def mgdg(self): self.max_score += 10 if self.scn.has_fragment('[Hexose+H2O-H] (NL 197.07)'): self.score += 10
[docs] def dgdg(self): self.max_score += 10 if self.scn.has_fragment('NL [2xHexose+H2O-H] (NL 359.1190)'): self.score += 10
[docs]class GL_Negative(AbstractMS2Identifier): """ Generic class for identification of glycerolipids. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, **kwargs, )
[docs] def confirm_class(self): self.max_score += 10 if self.scn.has_chain_combination( record = self.rec, chain_param = ( {'frag_type': {'FA-H', 'FA-'}}, ) ): self.score += 10
# # Glycerophospholipids #
[docs]class PE_Negative(AbstractMS2Identifier): """ Examines if a negative mode MS2 spectrum is Phosphatidylethanolamine. **Specimen:** - GM2A - 714.507 and 716.523 **Principle:** - The most abundant fragment is a fatty acid [M-H]- ion. - 140.0118 PE headgroup must be present. - Other headgroup ions 196.0380 and 178.0275 add to the score. - Lyso-PE and [M-H-CO2]- fatty acid fragments complementing the highest [M-H]- fatty acid increase the score. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, **kwargs, )
[docs] def confirm_class(self): self.max_score += 11 if ( self.scn.chain_fragment_type_is( i = 0, chain_type = 'FA', frag_type = 'FA-H' ) and self.scn.has_fragment('PE [P+E] (140.0118)') ): self.score += 5 self.score += sum(map(bool, ( self.scn.has_fragment('PE [G+P+E-H2O] (196.0380)'), self.scn.has_fragment('PE [G+P+E] (178.0275)'), ))) * 3 # by default this returns max 6 self.matching_chain_combinations( {'frag_type': 'FA-H'}, {'frag_type': { 'LysoPE', 'LysoPEAlkyl', 'LysoPEAlkyl-H2O', 'FA-H2O-H' } } )
[docs]class PE_Positive(AbstractMS2Identifier): """ Examines if a positive mode MS2 spectrum is a Phosphatidylethanolamine. **Specimen:** - in vivo BPI + 718.536 - Lyso-PE: in vitro FABP1 + 454.29 **Principle:** - The PE headgroup neutral loss 141.0191 has the highest intensity. - If it is a Lyso-PE score will be zero. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, **kwargs, )
[docs] def confirm_class(self): self.max_score = 20 if self.scn.has_fragment('NL PE [P+E] (NL 141.0191)'): if self.check_lyso(): return if self.scn.has_fragment('PE [P+E] (142.0264)'): self.score += 5 self.score += 15
[docs]class LysoPE_Positive(AbstractMS2Identifier): """ Examines if a positive mode MS2 spectrum is a Lysophosphatidylethanolamine. **Specimen:** - in vitro FABP1 + 454.29 **Principle:** - The PE headgroup neutral loss 141.0191 has the highest intensity. - A fatty acid-glycerol fragment should match the carbon count and unsaturation of the whole molecule. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, **kwargs, )
[docs] def confirm_class(self): self.max_score = 15 if self.scn.has_fragment('NL PE [P+E] (NL 141.0191)'): self.score = 5 if self.scn.has_fragment('PE [P+E] (142.0264)'): self.score += 5 self.scn.build_chain_list() if ( len(self.scn.chain_list) and self.scn.chain_fragment_type_is( self.scn.chain_list[0].i, frag_type = 'FA+Glycerol-OH', c = self.rec.chainsum.c, u = self.rec.chainsum.u, ) ): self.score += 5
[docs]class PC_Negative(AbstractMS2Identifier): """ Examines if a negative mode MS2 spectrum is a Phosphatidylcholine. **Specimen:** - BPI - 804.57 and 776.545 **Principle:** - 168.0431 phosphate+choline-CH3 fragment must be present. - The highest abundant fragment must be a fatty acid [M-H]- fragment. - Lyso-PC fragments complementing the highest [M-H]- fatty acid increase the score. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, **kwargs, )
[docs] def confirm_class(self): self.max_score = 17 if ( self.scn.chain_fragment_type_is( i = 0, chain_type = 'FA', frag_type = 'FA-H' ) and self.scn.has_fragment('PC/SM PO4+choline-CH3 (168.0431)') ): self.score += 5 self.score += sum(map(bool, ( self.scn.has_fragment('PE [G+P+E-H2O] (196.0380)'), self.scn.has_fragment('PE [G+P+E] (178.0275)'), ))) * 3 self.matching_chain_combinations( {'frag_type': 'FA-H'}, {'frag_type': 'LysoPC'}, score_method = lambda ccomb: ((ccomb > 1) * 6, 6), )
[docs]class PC_Positive(AbstractMS2Identifier): """ Examines if a positive mode MS2 spectrum is a Phosphatidylcholine. **Specimen:** - BPI + 786.607 **Principle:** - The most abundant fragment must be choline+phosphate 184.0733. - The 86.0964 ethyl-trimetylammonium must be present. - The most abundant fatty acid can not have the same carbon count and unsaturation as the whole molecule (then it is Lyso-PC). - Fragments 104.1069, 124.9998, 60.0808 and 58.0651 increase the score. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, must_have_chains = False, **kwargs, )
[docs] def confirm_class(self): self.max_score = 13 if ( self.scn.fragment_percent_of_most_abundant( 'PC/SM [P+Ch] (184.0733)', 10.0 ) and self.scn.has_fragment('PC/SM [Ch] (86.096)') ): if self.check_lyso(): return self.score += 5 self.score += sum(map(bool, ( self.scn.has_fragment('PC/SM [Ch+H2O] (104.107)'), self.scn.has_fragment('PC/SM [P+Et] (124.9998)'), self.scn.has_fragment('PC/SM [N+3xCH3] (60.0808)'), self.scn.has_fragment('PC/SM [Ch-Et] (58.0651)'), ))) * 2
[docs]class LysoPC_Positive(AbstractMS2Identifier): """ Examines if a positive mode MS2 spectrum is a Lysophosphatidylcholine. **Specimen:** - in vitro FABP1 + 522.36 **Principle:** - Choline-phosphate 184.0733, ethyl-trimethylammonium 86.0964 and neutral loss 183.0660 must be present. - The latter neutral loss corresponds to a fatty acid+glycerol ion. - The carbon count and unsaturation of this fragment should match that of the whole molecule. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, must_have_chains = False, **kwargs, )
[docs] def confirm_class(self): self.max_score = 15 if ( self.scn.most_abundant_fragment_is('PC/SM [P+Ch] (184.0733)') and self.scn.has_fragment('PC/SM [Ch] (86.096)') ): self.score += 5 if self.scn.has_fragment('NL PC/SM [P+Ch] (NL 183.066)'): self.score += 5 if self.scn.has_chain_fragment_type( frag_type = {'FA+Glycerol-OH', 'NL FA-H2O'}, c = self.rec.chainsum.c, u = self.rec.chainsum.u, ): self.score += 5
[docs]class PI_Negative(AbstractMS2Identifier): """ Examines if a negative MS2 spectrum is Phosphatidylinositol. **Specimen:** - GM2A - 835.52 **Principle:** - Inositolphosphate-H2O fragment 241.0119, metaphosphate 78.9591 and headgroup fragment 152.9958 must be present. - Additional headgroup fragments 96.9696, 259.0224 and 297.0381 increase the score. - Presence of Lyso-PI fragments complementing other [M-H]- fatty acid fragments increase the score. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, **kwargs, )
[docs] def confirm_class(self): self.max_score = 19 if ( self.scn.has_fragment('PI [InsP-H2O]- (241.01)') and self.scn.has_fragment('PA/PG/PI/PS [G+P] (152.9958)') and self.scn.has_fragment('Cer1P/PIP/PL metaphosphate (78.9591)') ): self.score += 5 self.score += sum(map(bool, ( self.scn.has_fragment('Cer1P/PI phosphate (96.9696)'), self.scn.has_fragment('PI [InsP-H]- (259.02)'), self.scn.has_fragment('PI [G+P+I] (297.04)'), self.scn.has_fragment('PI [InsP-2H2O]- (223.00)'), ))) * 2 self.matching_chain_combinations( {'frag_type': 'FA-H'}, {'frag_type': { 'LysoPI', 'LysoPI-H2O', } }, score_method = lambda ccomb: (min(ccomb, 2) * 3, 6), )
[docs]class PI_Positive(AbstractMS2Identifier): """ Examines if a negative MS2 spectrum is Phosphatidylinositol. **Specimen:** - SEC14L2 + 906.60 and 882.6 **Principle:** - Combinations of fatty acid fragments must match the expected carbon count and unsaturation for PI. - Presence of neutral losses 259.0219 and 277.0563 adds to the score. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, must_have_chains = True, **kwargs, )
[docs] def confirm_class(self): self.max_score = 9 if self.scn.has_chain_combinations(self.rec): self.score += 1 self.score += sum(map(bool, ( self.scn.has_fragment('NL PI [P+Ins] (NL 259.0219)'), self.scn.has_fragment('NL PI [P+Ins+NH3] (NL 277.0563)'), ))) * 4
[docs]class PS_Negative(AbstractMS2Identifier): """ Examines if a negative mode MS2 spectrum is a Phosphatidylserine. **Specimen:** - ORP9 - 788.54 **Principle:** - The most abundant fragment is an [M-H]- fatty acid fragment. - Glycerophosphate fragment 152.9958 must be present. - Metaphosphate 78.9591 increases the score. - Serine-H2O neutral loss 87.0320 adds to the score. - Presence of Lyso-PS and Lyso-PA fragments complementing the highest [M-H]- fatty acid fragment increase the score. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, must_have_chains = True, **kwargs, )
[docs] def confirm_class(self): self.max_score = 17 if ( self.scn.has_chain_combinations(self.rec) and self.scn.chain_fragment_type_is( 0, chain_type = 'FA', frag_type = 'FA-H' ) and self.scn.fragment_among_most_abundant( 'PA/PG/PI/PS [G+P] (152.9958)', 5 ) ): self.score += 5 self.score += sum(map(bool, ( self.scn.has_fragment('Cer1P/PIP/PL metaphosphate (78.9591)'), self.scn.has_fragment('PS [Ser-H2O] (87.0320)'), ))) * 3 self.matching_chain_combinations( {'frag_type': 'FA-H'}, {'frag_type': {'LysoPS', 'LysoPA'}}, score_method = lambda ccomb: (min(ccomb, 2) * 3, 6), )
[docs]class PS_Positive(AbstractMS2Identifier): """ Examines if a positive mode MS2 spectrum is a Phosphatidylserine. **Specimen:** - BPI + 790.56 **Principle:** - PS headgroup neutral loss 185.0089 must be the highest intensity. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, must_have_chains = True, **kwargs, )
[docs] def confirm_class(self): self.max_score = 5 if self.scn.fragment_among_most_abundant('PS [P+S] (NL 185.0089)', 1): self.score += 5
[docs]class PG_Negative(AbstractMS2Identifier): """ Examines if a negative mode MS2 spectrum is Phosphatidylglycerol. The result will be the same as `bmp_neg_1`, as in negative mode we do not know a way to distinguish these species. **Specimen:** - GM2A - 799.54 - BPIFB2 - 773.5258 (might be BMP) **Principle:** - The most abundant fragment is a fatty acid [M-H]- ion. - The 152.9958 glycerophosphate fragment must be present. - If Lyso-PG fragment present with carbon count complementing the [M-H]- fatty acid score is higher. - Presence of 171.0064 headgroup fragment adds to the score. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, must_have_chains = True, **kwargs, )
[docs] def confirm_class(self): self.max_score = 14 if ( self.scn.has_chain_combinations(self.rec) and self.scn.chain_fragment_type_is( 0, chain_type = 'FA', frag_type = 'FA-H' ) and self.scn.has_fragment('PA/PG/PI/PS [G+P] (152.9958)') ): self.score += 5 if self.scn.has_fragment('PG headgroup (171.0064)'): self.score += 3 self.matching_chain_combinations( {'frag_type': 'FA-H'}, {'frag_type': { 'LysoPG', 'LysoPG-H2O', } }, score_method = lambda ccomb: (min(ccomb, 2) * 3, 6), )
[docs]class PG_Positive(AbstractMS2Identifier): """ Examines if a positive mode MS2 spectrum is a Phosphatidylglycerol. At in vivo observed only in standard. **Principle:** - The PG headgroup neutral loss (189.0402) is the fragment ion with the highest intensity? """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, **kwargs, )
[docs] def confirm_class(self): self.max_score = 5 if ( self.scn.most_abundant_fragment_is( 'NL PG [G+P+NH3] (NL 189.0402)' ) ): self.score += 5 # alternative for lyso if ( self.rec.hg.sub == ('Lyso',) and self.scn.chain_fragment_type_among_most_abundant( n = 1, frag_type = {'FA+Glycerol-OH'}, c = self.rec.chainsum.c, u = self.rec.chainsum.u, ) ): self.score += 5 self.max_score += 5
[docs]class BMP_Negative(PG_Negative): """ Examines if a negative mode MS2 spectrum is Bismonoacylglycerophosphate. The result will be the same as for PG, as in negative mode we do not know a way to distinguish these species. **Specimen:** - GM2A - 799.54 - BPIFB2 - 773.5258 (might be BMP) **Principle:** - The most abundant fragment is a fatty acid [M-H]- ion. - The 152.9958 glycerophosphate fragment must be present. - If Lyso-PG fragment present with carbon count complementing the [M-H]- fatty acid score is higher. - Presence of 171.0064 headgroup fragment adds to the score. """ def __init__(self, record, scan, **kwargs): PG_Negative.__init__(self, record, scan, **kwargs)
[docs]class BMP_Positive(AbstractMS2Identifier): """ Examines if a positive mode MS2 spectrum is a Bismonoacylglycerophosphate. **Specimen:** - BPIFB2 + 792.57 **Principle:** - A glycerol+fatty acid fragment can be found among the 3 highest? - The PG headgroup neutral loss (189.0402) is among the fragments? - If so, does it have a lower intensity than half of the fatty acid+glycerol fragment? """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, must_have_chains = True, **kwargs, )
[docs] def confirm_class(self): self.max_score = 10 if ( self.scn.has_chain_combinations(self.rec, head = 15) and self.scn.chain_fragment_type_among_most_abundant( chain_type = 'FA', frag_type = 'FA+Glycerol-OH', n = 3 ) ): self.score += 5 i_hg = self.scn.fragment_by_name('NL PG [G+P+NH3] (NL 189.0402)') if i_hg is not None: i_gfa = self.scn.highest_fragment_by_chain_type( head = 4, frag_type = 'FA+Glycerol-OH' ) if self.scn.intensities[i_gfa] < self.scn.intensities[i_hg]: self.score = 0 else: self.score += 5
[docs]class PA_Negative(AbstractMS2Identifier): """ Examines if a negative mode MS2 spectrum is Phosphatidylglycerol. The result will be the same as `bmp_neg_1`, as in negative mode we do not know a way to distinguish these species. **Specimen:** - GM2A - 799.54 - BPIFB2 - 773.5258 (might be BMP) **Principle:** - The most abundant fragment is a fatty acid [M-H]- ion. - The 152.9958 glycerophosphate fragment must be present. - If Lyso-PG fragment present with carbon count complementing the [M-H]- fatty acid score is higher. - Presence of 171.0064 headgroup fragment adds to the score. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, must_have_chains = True, **kwargs, )
[docs] def confirm_class(self): self.max_score = 25 if ( self.scn.has_chain_combinations(self.rec) and self.scn.chain_fragment_type_is( 0, chain_type = 'FA', frag_type = 'FA-H' ) and self.scn.fragment_among_most_abundant( 'PA/PG/PI/PS [G+P] (152.9958)', 10 ) and self.scn.fragment_among_most_abundant( 'Cer1P/PIP/PL metaphosphate (78.9591)', 10 ) ): self.score += 20 if self.scn.has_fragment('Cer1P/PI phosphate (96.9696)'): self.score += 5
[docs]class PA_Positive(AbstractMS2Identifier): """ Examines if a positive mode MS2 spectrum is a Phosphatidylglycerol. At in vivo observed only in standard. **Principle:** - The PG headgroup neutral loss (189.0402) is the fragment ion with the highest intensity? """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, **kwargs, )
[docs] def confirm_class(self): self.max_score = 20 if ( self.scn.fragment_among_most_abundant( 'NL [P] (NL 97.9769)', 3, adduct = self.add ) ): self.score += 10 if self.scn.has_chain_combination( self.rec, chain_param = ({ 'frag_type': { 'FA+Glycerol-OH', 'FA-OH', 'FA-H2O-OH', } },) ): self.score += 10
# # Vitamins #
[docs]class VA_Positive(AbstractMS2Identifier): """ Examines if a positive MS2 spectrum is vitamin A (retinol). **Specimen:** - in vivo RBP1 + 269.2245 - in vivo RBP4 + 269.2245 **Principle:** - The most abundant ion is the whole molecule m/z = 269.224. - Presence off 3 other ions adds to the score but not mandatory: 213.165, 145.1027, 157.1028. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, must_have_chains = False, **kwargs, )
[docs] def confirm_class(self): self.max_score = 8 if self.scn.fragment_among_most_abundant('Retinol I (269.2264)', 3): self.score += 5 self.score += sum(map(bool, ( self.scn.has_fragment('Retinol II (213.1637)'), self.scn.has_fragment('Retinol III (157.1012)'), self.scn.has_fragment('Retinol IV (145.1012)'), )))
[docs]class VA_Negative(AbstractMS2Identifier): """ Examines if a positive MS2 spectrum is vitamin A (retinol). **Specimen:** - Standards 141020 negative scan 673 **Principle:** - 3 fragments seems to always present and be among the most abundant: 79.055, 119.087 and 255.212; presence of these is the main condition. - We also detected 125.061 in our standards which is special because contains 2 oxygens; presence of this increase the score. """ def __init__(self, record, scan, **kwargs): AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, must_have_chains = False, **kwargs, )
[docs] def confirm_class(self): self.max_score = 8 if all(( self.scn.fragment_among_most_abundant(fragname, 7) for fragname in ( 'Retinoic acid I (79.0553)', 'Retinoic acid II (119.0866)', 'Retinoic acid IV (255.2118)', ) )): self.score += 5 if self.scn.has_fragment('Retinoic acid III (125.0608)'): self.score += 3
# # Sphingolipids #
[docs]class Cer_Positive(AbstractMS2Identifier): """ Examines if a positive mode MS2 spectrum is a ceramide. Identifies ceramide varieties including sphingomyeline, ceramide-1-phosphare, ceramide-phosphoethanolamine, OH-acyl-ceramide, hexosyl and dihexosyl-ceramides, and d, t and DH long chain base varieties. dCer ==== **Specimen:** - Sphingolipid standards scans - SEC14L1 + 538.52 - STARD11 + 538.526 **Principle:** - A sphingosine backbone with two H2O loss must be among the 10 most abundant fragments. - Fatty acid [M+H]+ or [M-O]+ fragments or neutral losses complementing the one above increase the score. - Sphingosine backbone fragments with same carbon count and unsaturation with the one with 2 water loss but [Sph-C-2(H2O)]+ or [Sph-H2O]+ add to the score. - The score increases if the following choline fragments can not be found: 58.0651, 104.1070, 124.9998 and 184.0733. - The presence of the following fragments increase the score: 60.0444, 70.0651, 82.0651, 96.0808, 107.0730, 121.0886, 135.1042 and 149.1199. DHCer ===== **Specimen:** - Standards 180626 m/z 568.56 scan 2367 **Principle:** - Same pattern as at dCer but from the sphingosine derived fragments it becomes clear if it has no unsaturation. tCer ==== **Specimen:** - Standards 180628 m/z 584.56 scan 2070 **Principle:** - Has Sph+H2O-H fragment which does not occur at dCer. - Sph-H2O-H and Sph-H are much higher abundant than at dCer. - Strong fragment at 60.044 which is missing at dCer. - H2O and 2xH2O neutral losses are much higher than at dCer. dCer-2OH-acyl ============= **Specimen:** - Standards 180615 m/z 584.22 scan 2421 **Principle:** - Same as other ceramides. It's d and DH forms are isobaric with tCer but d and t are clearly distinguishable so this does not cause confusion. dCer-1-P ======== **Specimen:** - in vivo GLTPD1 + 728.59 **Principle:** - A shpingosine backbone with 2 H2O loss must be among the 3 highest intensity fragments. - Presence of any of the following fragments increases the score: 82.0651, 115.9875. 107.0729, 135.1043, 149.1199. Hex-dCer ======== **Specimen:** - in vivo GLTP + 810.68 **Principle:** - Hexose fragments 198.0740, 180.0634 and 162.0528 must present. These are neutral losses of hexose, hexose-H2O and hexose+H2O Hex-tCer ======== **Specimen:** - in vivo GLTP + 826.67 - in vitro GLTP + 826.67, 800.66, **Principle:** - Hexose fragments 198.0740, 180.0634 and 162.0528 must present. These are neutral losses of hexose, hexose-H2O and hexose+H2O Hex2-dCer ========= **Specimen:** - in vivo GLTP + 988.73 **Principle:** - Loss of double hexose with our without extra water or water loss are the characteristic fragments of this class. SHex-dCer ========= **Specimen:** - in vitro 890.64 dSM & DHSM ========== **Specimen:** - in vivo GLTPD1 + 703.57 - in vitro GLTPD1 + 813.68 **Principle:** - The following choline fragments must be present: 60.0808, 86.0964, 104.1069, 124.9998 and 184.0733. The last one is the most intensive. - If 58.0651 can be found it adds to the score. - dSM and DHSM are not distinguishable in our settings. Maybe the [Sph-2xH2O+H]+ ion (264 @ 18:1) presents more often at d and only eventually at DH. PE-Cer ====== We do not have this in standards or in screens so we can not test this. Based on Amiar 2016 and Narayanaswamy 2014. **Principle:** - Neutral loss of 141.0191 must be present. - 142.0264 phospho-ethanolamine fragment and neutral loss of phospho-ethanolamine+water might be present. - Sph-2xH2O fragment increases the score. """ class_methods = { 'SM': 'sm', 'Sph': 'sph', } subclass_methods = { '1P': 'cer1p', 'Hex': 'hexcer', 'Hex2': 'hex2cer', 'SHex': 'shexcer', 'SHex2': 'shex2cer', 'PE': 'pe_cer', 'M2': 'm2', 'M1': 'm1', 'M3': 'm3', 'PC': 'pc', 'empty': 'cer', } def __init__(self, record, scan, **kwargs): self.nacyl = record.chainsum is not None and len(record.chainsum) > 1 self.oacyl = record.chainsum is not None and len(record.chainsum) > 2 AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (1,) if self.nacyl else (), chain_comb_args = {}, must_have_chains = True, **kwargs, ) self.sph_scores = {} self.sph_max_scores = {} self.fa_scores = {} self.fa_max_scores = {}
[docs] def confirm_class(self): AbstractMS2Identifier.confirm_class(self) self.max_score += 2 self.score += sum(map( self.scn.has_fragment, ( 'NL [H2O] (NL 18.0106)', 'NL [2xH2O] (NL 36.0211)', ) )) self.confirm_subclass()
[docs] def confirm_chains_explicit(self): """ Most of the time we don't really have fatty acid derived fragments from ceramides in positive mode. However certain sphingosin base derived fragments correspond to neutral losses of fatty acids as the molecule is composed of a sphingosine and a fatty acid but this is redundant. Hence we call both explicit and implicit identification as practically the it is implicit anyways. """ for chains in itertools.chain( AbstractMS2Identifier.confirm_chains_explicit(self), AbstractMS2Identifier.confirm_chains_implicit(self), ): if chains[0][0].attr.sph == self.rec.chainsum.attr[0].sph: # the sphingosin base and fatty acyl related part of the # score is valid only for the current chain combination # hence now we add these to the overall score, yield the # identification and then subtract them from the score sph_score, sph_max_score = self.sphingosine_base( chains[0][0].attr.sph ) self.score += sph_score self.max_score += sph_max_score if self.nacyl: fa_score, fa_max_score = self.fatty_acyl(chains[0][1]) self.score += fa_score self.max_score += fa_max_score yield chains self.score -= sph_score self.max_score -= sph_max_score if self.nacyl: self.score -= fa_score self.max_score -= fa_max_score
[docs] def fatty_acyl(self, fa): score = 0 max_score = 0 if fa.attr.oh: if self.add == '[M-H2O+H]+': score -= 20 return score, max_score
[docs] def cer(self): score = 0 max_score = 0 non_hex_score, non_hex_max_score = self.non_hex() score += non_hex_score max_score += non_hex_max_score return score, max_score
[docs] def non_hex(self): score = 0 max_score = 0 score -= sum(map(bool, ( self.scn.has_fragment('NL [Hexose-H2O] (NL 162.05)'), self.scn.has_fragment('NL [Hexose] (NL 180.06)'), self.scn.has_fragment('NL [Hexose+H2O] (NL 198.07)'), self.scn.has_fragment('NL [2xHexose] (NL 342.1162)'), self.scn.has_fragment('NL [2xHexose+H2O] (NL 360.1268)'), self.scn.has_fragment('NL [2xHexose-H2O] (NL 324.1056)'), self.scn.has_fragment('NL [2xHexose+O] (NL 358.1111)'), self.scn.has_fragment('NL [2xHexose+C] (NL 372.1268)'), self.scn.has_fragment('NL [S] (NL 79.9568)'), self.scn.has_fragment('NL [S+H2O] (97.9674)'), self.scn.has_fragment('NL [Hexose+SO3] (NL 242.100)'), self.scn.has_fragment('NL [Hexose+SO3+H2O] (NL 260.0202)'), self.scn.has_fragment('NL [Hexose+SO3+2xH2O] (NL 278.0308)'), self.scn.has_fragment('NL [2xHexose+SO3] (NL 404.0625)'), self.scn.has_fragment('NL [2xHexose+SO3+H2O] (NL 422.0730)'), self.scn.has_fragment('NL [2xHexose+SO3+2xH2O] (NL 440.0836)'), ) )) * 5 return score, max_score
[docs] def sm(self): score = 0 max_score = 47 if self.scn.most_abundant_fragment_is('PC/SM [P+Ch] (184.0733)'): score += 15 score += sum(map(bool, ( self.scn.has_fragment('PC/SM [N+3xCH3] (60.0808)'), self.scn.has_fragment('PC/SM [Ch] (86.096)'), self.scn.has_fragment('PC/SM [Ch+H2O] (104.107)'), self.scn.has_fragment('PC/SM [P+Et] (124.9998)'), self.scn.has_fragment('PC/SM [Ch-Et] (58.0651)'), self.scn.has_fragment('NL PC/SM [P+Ch] (NL 183.066)'), self.scn.has_fragment('NL SM [P+Ch] (NL 201.0766)'), self.scn.has_fragment('NL SM [N+3xCH3] (77.0841)'), self.scn.has_fragment('NL [H2O] (NL 18.0106)'), ) )) * 3 if self.scn.has_chain_fragment_type(frag_type = 'Sph-2xH2O+H'): score += 5 self.must_have_chains = False return score, max_score
[docs] def pc(self): """ Lyso-SM aka Sph-PC. Scherer 2010, Table 1. """ score = 0 max_score = 15 if self.scn.most_abundant_fragment_is('PC/SM [P+Ch] (184.0733)'): score += 15 return score, max_score
[docs] def pe_cer(self): score = 0 max_score = 30 if self.scn.has_fragment('NL PE [P+E] (NL 141.0191)'): score += 15 score += sum(map(bool, ( self.scn.has_fragment('PE [P+E] (142.0264)'), self.scn.has_fragment('NL PE [P+E+H2O] (NL 159.0297)'), self.scn.has_fragment('NL PE [P+E-H2O] (NL 123.0085)'), ) )) * 5 return score, max_score
[docs] def cer1p(self): score = 0 max_score = 31 if self.scn.has_fragment('NL [P+H2O] (NL 115.9875)'): score += 10 if self.scn.chain_among_most_abundant(3, frag_type = 'Sph-2xH2O-H'): score += 10 if self.scn.has_chain_combination( self.rec, chain_param = ( {'frag_type': { 'Sph-2xH2O+H', 'Sph-H2O+H', 'Sph-H2O-H' } }, {'frag_type': 'FA+NH+C2H2-OH'}, ) ): score += 5 score += sum(map(bool, ( self.scn.has_fragment('NL [P] (NL 79.9663)'), self.scn.has_fragment('NL [P] (NL 97.9769)'), ) )) * 3 non_hex_score, non_hex_max_score = self.non_hex() score += non_hex_score return score, max_score
[docs] def hexcer(self): score = 0 max_score = 14 score += sum(map(bool, ( self.scn.has_fragment('NL [Hexose-H2O] (NL 162.05)'), self.scn.has_fragment('NL [Hexose] (NL 180.06)'), self.scn.has_fragment('NL [Hexose+H2O] (NL 198.07)'), ) )) * 3 if self.hexcer_chain_combination(): score += 5 return score, max_score
[docs] def hex2cer(self): score = 0 max_score = 39 score += sum(map(bool, ( self.scn.has_fragment('NL [2xHexose] (NL 342.1162)'), self.scn.has_fragment('NL [2xHexose+H2O] (NL 360.1268)'), ) )) * 10 score += sum(map(bool, ( self.scn.has_fragment('NL [2xHexose-H2O] (NL 324.1056)'), self.scn.has_fragment('NL [2xHexose+O] (NL 358.1111)'), self.scn.has_fragment('NL [2xHexose+C] (NL 372.1268)'), ) )) * 3 if self.hexcer_chain_combination(): score += 10 return score, max_score
[docs] def hexcer_chain_combination(self): return self.scn.has_chain_combination( self.rec, chain_param = ( {'frag_type': { 'Sph-2xH2O+H', 'Sph-2xH2O-H', 'Sph-H2O+H', 'Sph-H2O-H', 'Sph-C-2xH2O', } }, {'frag_type': { 'FA-OH', 'NL FA', 'FA+NH2-O', } }, ) )
[docs] def shexcer(self): score = 0 max_score = 25 score += sum(map(bool, ( self.scn.has_fragment('NL [S] (NL 79.9568)'), self.scn.has_fragment('NL [S+H2O] (97.9674)'), self.scn.has_fragment('NL [Hexose+SO3] (NL 242.100)'), self.scn.has_fragment('NL [Hexose+SO3+H2O] (NL 260.0202)'), self.scn.has_fragment('NL [Hexose+SO3+2xH2O] (NL 278.0308)'), ) )) * 5 return score, max_score
[docs] def shex2cer(self): score = 0 max_score = 25 score += sum(map(bool, ( self.scn.has_fragment('NL [S] (NL 79.9568)'), self.scn.has_fragment('NL [S+H2O] (97.9674)'), self.scn.has_fragment('NL [2xHexose+SO3] (NL 404.0625)'), self.scn.has_fragment('NL [2xHexose+SO3+H2O] (NL 422.0730)'), self.scn.has_fragment('NL [2xHexose+SO3+2xH2O] (NL 440.0836)'), ) )) * 5 return score, max_score
[docs] def m2(self): score = 0 max_score = 46 if self.scn.has_fragment('PC/SM [Ch-Et] (58.0651)'): score += 10 if ( self.rec.chainsum.u > 0 and self.scn.has_fragment('[C7+NH2] (110.0964)') ): score += 10 score += sum(map(bool, ( self.scn.has_fragment('[C5+NH2+2H] (84.0808)'), self.scn.has_fragment('[C6+NH2] (96.0808)'), ))) * 3 if self.scn.has_chain_combination( self.rec, chain_param = ( { 'frag_type': { 'Sph-2xH2O+CH3', 'Sph-O-H2O+CH3+H', 'Sph-2xH2O+2xCh3+H', 'Sph-H2O+2xCH3+H', } }, ) ): score += 20 return score, max_score
[docs] def m1(self): score = 0 max_score = 30 if self.scn.has_fragment('PC/SM [Ch-Et] (58.0651)'): score += 10 if self.scn.has_chain_combination( self.rec, head = 10, chain_param = ( { 'frag_type': { 'Sph-2xH2O+CH3', 'Sph-O-H2O+CH3+H', 'Sph-H2O+CH3+H', } }, ) ): score += 20 if self.scn.has_chain_fragment_type( frag_type = {'Sph-2xH2O+2xCH3+H', 'Sph-H2O+2xCH3+H'}, c = self.rec.chainsum.c - 1, u = self.rec.chainsum.u, ): score -= 20 return score, max_score
[docs] def m3(self): score = 0 max_score = 20 if self.scn.fragment_among_most_abundant( 3, 'PC/SM [N+3xCH3] (60.0808)' ): score += 20 return score, max_score
[docs] def sph(self): score = 0 max_score = 9 score += sum(map(bool, ( self.scn.has_fragment('[C3+NH2] (56.0495)'), self.scn.has_fragment('[C2+NH2+O] (60.0444)'), self.scn.has_fragment('[C4+NH2+OH] (86.0600)'), ) )) * 3 return score, max_score
[docs] def sphingosine_base(self, sph): if sph not in self.sph_scores: method = 'sphingosine_%s' % sph.lower() self.sph_scores[sph], self.sph_max_scores[sph] = ( getattr(self, method)() if hasattr(self, method) else (0, 0) ) return self.sph_scores[sph], self.sph_max_scores[sph]
[docs] def sphingosine_d(self): score = 0 max_score = 20 if self.rec.chainsum and self.rec.chainsum.u == 0: return score, max_score if ( self.nacyl and self.scn.chain_fragment_type_is( 0, frag_type = 'Sph-2xH2O+H', u = (False, {0}), ) ) or ( not self.nacyl and self.scn.chain_fragment_type_is( 0, frag_type = 'Sph-H2O+H', u = (False, {0}), ) ) or ( self.rec.hg.main == 'SM' and self.scn.chain_among_most_abundant( 5, frag_type = 'Sph-2xH2O+H', u = (False, {0}), skip_non_chains = True, ) ): score += 6 score += sum(map(bool, ( not self.scn.has_fragment('[C2+NH2+O] (60.0444)'), self.scn.has_fragment('NL [C+2xH2O] (NL 48.0211)') ) )) * 2 if ( self.scn.chain_fragment_type_among_most_abundant( 4, frag_type = 'Sph-H2O+H', u = (False, {0}) ) and self.scn.chain_fragment_type_among_most_abundant( 4, frag_type = 'Sph-C-O-H2O-H', u = (False, {0}) ) and self.scn.chain_fragment_type_among_most_abundant( 4, frag_type = 'Sph-2xH2O+H', u = (False, {0}) ) ): score += 10 return score, max_score
[docs] def sphingosine_dh(self): score = 0 max_score = 20 score += sum(map(bool, ( self.scn.has_fragment('[C2+NH2+O] (60.0444)'), not self.scn.has_fragment('NL [C+2xH2O] (NL 48.0211)') ) )) score += sum(map(bool, ( self.scn.chain_fragment_type_among_most_abundant( 5, frag_type = 'Sph-H2O+H', u = 0 ), self.scn.chain_fragment_type_among_most_abundant( 5, frag_type = 'FA+NH2-O', u = 0 ), self.scn.chain_fragment_type_among_most_abundant( 10, frag_type = 'Sph-2xH2O+H', u = 0 ), self.scn.has_chain_fragment_type( frag_type = 'Sph-C-O-H2O-H', u = 0 ), self.scn.has_chain_fragment_type( frag_type = 'Sph+H', u = 0 ), self.scn.has_chain_fragment_type( frag_type = 'Sph-C-O-H2O-NH', u = 0 ) ) )) * 3 return score, max_score
[docs] def sphingosine_t(self): score = 0 max_score = 20 if all(( self.scn.chain_fragment_type_among_most_abundant( 5, frag_type = 'Sph-H2O-H', ), self.scn.chain_fragment_type_among_most_abundant( 10, frag_type = 'Sph-2xH2O-H', ), ( self.scn.fragment_among_most_abundant( '[C2+NH2+O] (60.0444)' ) or self.rec.hg.sub ) )): score = 9 score += sum(map(bool, ( not self.scn.has_fragment('NL [C+2xH2O] (NL 48.0211)'), self.scn.has_fragment('NL [3xH2O] (NL 54.0317)') ) )) score += sum(map(bool, ( self.scn.has_chain_fragment_type( frag_type = 'Sph-C-2xH2O', ), self.scn.has_chain_fragment_type( frag_type = 'Sph+H2O-H', ), self.scn.chain_fragment_type_among_most_abundant( 5, frag_type = 'Sph-H', ), ) )) * 3 return score, max_score
[docs] def sphingosine_k(self): score = 0 max_score = 39 if self.scn.has_chain_fragment_type(frag_type = 'Sph-NH2-H2O-2H'): score += 15 score += sum(map(bool, ( self.scn.has_fragment('[C2+NH2+O] (60.0444)'), self.scn.has_fragment('[C4+NH2+OH] (86.0600)'), self.scn.has_fragment('[C6+OH] (99.0804)'), self.scn.has_fragment('[C3+NH2] (56.0495)'), ) )) * 3 score += sum(map(bool, ( self.scn.has_chain_fragment_type( frag_type = 'Sph-C-2xH2O', ), self.scn.has_chain_fragment_type( frag_type = 'Sph-H2O-H', ), self.scn.has_chain_fragment_type( frag_type = 'Sph-H', ), self.scn.chain_fragment_type_among_most_abundant( 5, frag_type = 'Sph-H', ), ) )) * 3 return score, max_score
[docs]class Cer_Negative(AbstractMS2Identifier): """ Examines if a positive mode MS2 spectrum is a ceramide. Identifies ceramide varieties including sphingomyeline, ceramide-1-phosphare, ceramide-phosphoethanolamine, OH-acyl-ceramide, hexosyl and dihexosyl-ceramides, and d, t and DH long chain base varieties. dCer ==== **Specimen:** - in vivo SEC14L1 583, 554, 580 (formiate adduct) - in vivo STARD11 583, 554 (formiate adduct) - standards DHCer ===== **Specimen:** - standards tCer ==== **Specimen:** - standards """ class_methods = { 'Cer': 'cer', 'SM': 'sm', } subclass_methods = { '1P': 'cer1p', 'Hex': 'hexcer', } def __init__(self, record, scan, **kwargs): self.nacyl = record.chainsum is not None and len(record.chainsum) > 1 self.oacyl = record.chainsum is not None and len(record.chainsum) > 2 AbstractMS2Identifier.__init__( self, record, scan, missing_chains = (), chain_comb_args = {}, must_have_chains = True, **kwargs, ) self.sph_scores = {} self.fa_scores = {} self.sph_max_scores = {} self.fa_max_scores = {}
[docs] def confirm_class(self): AbstractMS2Identifier.confirm_class(self) self.confirm_subclass()
[docs] def confirm_chains_explicit(self): for chains in itertools.chain( AbstractMS2Identifier.confirm_chains_explicit(self), AbstractMS2Identifier.confirm_chains_implicit(self), ): if chains[0][0].attr.sph == self.rec.chainsum.attr[0].sph: # the sphingosin base and fatty acyl related part of the # score is valid only for the current chain combination # hence now we add these to the overall score, yield the # identification and then subtract them from the score sph_score, sph_max_score = self.sphingosine_base( chains[0][0].attr.sph ) self.score += sph_score self.max_score += sph_max_score if self.nacyl: fa_score, fa_max_score = self.fatty_acyl(chains[0][1]) self.score += fa_score self.max_score += fa_max_score yield chains self.score -= sph_score self.max_score -= sph_max_score if self.nacyl: self.score -= fa_score self.max_score -= fa_max_score
[docs] def fatty_acyl(self, fa): score = 0 max_score = 0 if len(fa.attr.oh) == 1: max_score = 30 if self.scn.has_chain_combination( self.rec, head = 20, # to exclude tCer chain_param = ( { 'frag_type': { 'Sph-H', # b1 'Sph-C2H4-NH2-H2O', # b5 } }, { 'frag_type': { 'FA+C2+NH2+O', # a5 @ hydroxyacyl 'FA+CH2+NH2+O', # a1 @ hydroxyacyl } } ) ): score += 30 return score, max_score
[docs] def cer(self): max_score = 23 cer_nl = ( 'NL H2O (NL 18.0106)', # Hsu c1 'NL 2xH2O (NL 36.0211)', # Hsu c4 'NL C+H2O (NL 30.0106)', # Hsu c2 'NL CH2+H2O (NL 32.0262)', # Hsu c3 'NL C+2xH2O (NL 48.0211)', # Hsu c5 'NL C+3xH2O (66.0455)', # Hsu c6 ) score = sum( self.scn.has_fragment(frag_name, adduct = self.add) for frag_name in cer_nl ) * 3 if self.scn.has_chain_combinations(self.rec, adduct = self.add): score += 5 return score, max_score
[docs] def sphingosine_base(self, sph): if sph not in self.sph_scores: method = 'sphingosine_%s' % sph.lower() self.sph_scores[sph], self.sph_max_scores[sph] = ( getattr(self, method)() if hasattr(self, method) else (0, 0) ) return self.sph_scores[sph], self.sph_max_scores[sph]
[docs] def sphingosine_d_dh(self): score = 0 max_score = 20 if self.scn.has_chain_combination( self.rec, chain_param = ( { 'frag_type': { 'Sph-H', # b1 'Sph-C2H4-3H', # b2 'Sph-CH2-H2O-H', # b3 'Sph-H2O-NH2-2H', # b4 'Sph-C2H4-NH2-H2O', # b5 } }, { 'frag_type': { 'FA+C2+NH2', # a2 'FA+C2+NH2-O', # a3 } } ) ): score += 20 return score, max_score
[docs] def sphingosine_d(self): score = 0 max_score = 20 d_dh_score, d_dh_max_score = self.sphingosine_d_dh() score += d_dh_score max_score += d_dh_max_score if self.scn.has_fragment('NL C+H2O (NL 30.0106)', adduct = self.add): score += 20 return score, max_score
[docs] def sphingosine_dh(self): score = 0 max_score = -20 d_dh_score, d_dh_max_score = self.sphingosine_d_dh() score += d_dh_score max_score += d_dh_max_score if self.scn.has_fragment('NL C+H2O (NL 30.0106)', adduct = self.add): score -= 20 return score, max_score
[docs] def sphingosine_t(self): score = 0 max_score = 28 if self.scn.has_fragment('NL C+3xH2O (66.0455)', adduct = self.add): score += 5 if self.scn.has_fragment('HexCer identity II'): score += 3 if self.scn.has_chain_combination( self.rec, chain_param = ( { 'frag_type': { 'Sph-CH2-NH2-4H', # b6 } }, { 'frag_type': { 'FA+C2H2+NH2', # a1 'FA+C3H2+NH2', # a10 } } ) ): score += 20 # differentiate from hydroxyacyl-dCer if self.scn.chain_percent_of_most_abundant( frag_type = {'FA+C2H2+NH2+O', 'FA+C2+NH2+O'}, percent = 5.0, ): score -= 10 return score, max_score
[docs] def cer1p(self): score = 0 max_score = 70 if any(map(bool, ( self.scn.has_fragment('Cer1P/PIP/PL metaphosphate (78.9591)'), self.scn.has_fragment('Cer1P/PI phosphate (96.9696)'), ))): score += 20 if self.scn.has_fragment( 'NL H2O (NL 18.0106)', adduct = self.add ): score += 10 if self.scn.has_chain_fragment_type( frag_type = {'NLFA_pH2O', 'NLFA_p2xH2O'}, adduct = self.add ): score += 10 self.must_have_chains = False if ( self.rec.hg.main == 'Sph' and self.scn.fragment_among_most_abundant( 'Cer1P/PIP/PL metaphosphate (78.9591)', 3 ) ): score += 20 self.must_have_chains = False if self.scn.has_fragment('Cer1P/PI phosphate (96.9696)'): score += 10 return score, max_score
[docs] def sm(self): score = 0 max_score = 45 if self.scn.fragment_among_most_abundant( 'NL CH2 (NL 14.0157)', 3, adduct = self.add ) and self.scn.fragment_among_most_abundant( 'PC/SM PO4+choline-CH3 (168.0431)', 5 ): score += 30 score += sum(map(bool, ( self.scn.has_fragment( 'Cer1P/PIP/PL metaphosphate (78.9591)' ), self.scn.has_fragment( 'NL choline+H2O', adduct = self.add ), self.scn.has_fragment( 'NL choline+H2O-CH3', adduct = self.add ), ))) * 5 self.must_have_chains = False return score, max_score
[docs] def hexcer(self): score = 0 max_score = 90 self.score += sum(map(bool, ( self.scn.fragment_among_most_abundant('HexCer identity I', 10), self.scn.fragment_among_most_abundant('HexCer identity II', 10), self.scn.fragment_among_most_abundant('HexCer identity III', 10), self.scn.has_fragment('[Hexose] (179.0561)'), self.scn.has_fragment('[Hexose-H2O] (161.0455)'), self.scn.has_fragment('[Hexose-HCHO] (149.0455)'), self.scn.has_fragment('NL hexose (162.053)'), self.scn.has_fragment('NL hexose+H2O (180.063)'), ))) * 10 if self.scn.has_chain_combinations(self.rec): score += 10 return score, max_score
[docs] def hex2cer(self): score = 0 max_score = 140 self.score += sum(map(bool, ( self.scn.has_fragment('HexCer identity I'), self.scn.has_fragment('HexCer identity II'), self.scn.has_fragment('HexCer identity III'), self.scn.has_fragment('[Hexose] (179.0561)'), self.scn.has_fragment('[Hexose-H2O] (161.0455)'), self.scn.has_fragment('[Hexose-HCHO] (149.0455)'), self.scn.has_fragment('NL hexose (162.053)'), self.scn.has_fragment('NL hexose+H2O (180.063)'), self.scn.has_fragment('[2xHexose-HCHO] (311.0984)'), self.scn.has_fragment('[2xHexose-H2O] (323.0984)'), self.scn.has_fragment('[2xHexose] (341.1089)'), self.scn.has_fragment('NL 2xHexose (324.106)'), self.scn.has_fragment('NL 2xHexose+H2O (342.1162)'), ))) * 10 if self.scn.has_chain_combinations(self.rec): score += 10 return score, max_score
[docs] def shexcer(self): score = 0 max_score = 100 self.missing_chains = (1,) if self.scn.has_fragment('Sulphate (96.9601)'): score += 20 self.score += sum(map(bool, ( self.scn.has_fragment('[Sulfohexose] (259.0129)'), self.scn.has_fragment('[Sulfohexose] (256.9972)'), self.scn.has_fragment('[Sulfohexose-H2O] (241.0024)'), self.scn.has_fragment('[Sulfohexose+Et+N] (300.0395)'), ))) * 10 if self.scn.has_chain_fragment_type( frag_type = { 'Sph+C6O5H8+SO3+H2O', 'Sph+C6O5H8+SO3+CO+H2O', } ): score += 20 if self.scn.has_chain_combination( self.rec, chain_param = ( { 'frag_type': { 'Sph+C6O5H8+SO3', 'Sph+C6O5H8+SO3+H2O', 'Sph+C6O5H8+SO3+CO+H2O', } }, { 'frag_type': { 'NLFA', 'NLFA_mH2O', } } ) ): score += 20 return score, max_score
[docs] def shex2cer(self): score = 0 max_score = 140 self.missing_chains = (1,) if self.scn.has_fragment('Sulphate (96.9601)'): score += 20 score += sum(map(bool, ( self.scn.has_fragment('[Sulfohexose] (259.0129)'), self.scn.has_fragment('[Sulfohexose] (256.9972)'), self.scn.has_fragment('[Sulfohexose-H2O] (241.0024)'), self.scn.has_fragment('[Sulfohexose+Et+N] (300.0395)'), self.scn.has_fragment('[2xHexose-H2O+SO3] (403.0552)'), self.scn.has_fragment('[2xHexose+SO3] (419.0501)'), self.scn.has_fragment('[2xHexose+SO3] (421.0658)'), self.scn.has_fragment('[2xHexose+SO3+Et+N] (462.0923)'), ))) * 10 if self.scn.has_chain_fragment_type( frag_type = { 'Sph+C12O10H18+SO3', 'Sph+C12O10H18+SO3+H2O', 'Sph+C12O10H18+SO3+CO+H2O', } ): score += 20 if self.scn.has_chain_combination( self.rec, chain_param = ( { 'frag_type': { 'Sph+C12O10H18+SO3', 'Sph+C12O10H18+SO3+H2O', 'Sph+C12O10H18+SO3+CO+H2O', } }, { 'frag_type': { 'NLFA', 'NLFA_mH2O', } } ) ): score += 20 return score, max_score
[docs] def pe_cer(self): score = 0 max_score = 30 score += sum(map(bool, ( self.scn.has_fragment('PE [P+E] (140.0118)'), self.scn.has_fragment('NL PE [P+E] (141.0191)'), self.scn.has_fragment('PE [P+E-H2O] (122.0013)'), ))) * 10 return score, max_score
# # Scan.identify() dispatches identification methods as below # idmethods = { 'neg': { lipproc.Headgroup(main = 'FA'): FA_Negative, lipproc.Headgroup(main = 'DAG'): DAG_Negative, lipproc.Headgroup(main = 'TAG'): TAG_Negative, lipproc.Headgroup(main = 'DGTA'): GL_Negative, lipproc.Headgroup(main = 'DGTS'): GL_Negative, lipproc.Headgroup(main = 'DGCC'): GL_Negative, lipproc.Headgroup(main = 'SQDG'): GL_Negative, lipproc.Headgroup(main = 'MGDG'): GL_Negative, lipproc.Headgroup(main = 'DGDG'): GL_Negative, lipproc.Headgroup(main = 'DGTA', sub = ('Lyso',)): GL_Negative, lipproc.Headgroup(main = 'DGTS', sub = ('Lyso',)): GL_Negative, lipproc.Headgroup(main = 'DGCC', sub = ('Lyso',)): GL_Negative, lipproc.Headgroup(main = 'SQDG', sub = ('Lyso',)): GL_Negative, lipproc.Headgroup(main = 'MGDG', sub = ('Lyso',)): GL_Negative, lipproc.Headgroup(main = 'DGDG', sub = ('Lyso',)): GL_Negative, lipproc.Headgroup(main = 'PE'): PE_Negative, lipproc.Headgroup(main = 'PE', sub = ('Lyso',)): PE_Negative, lipproc.Headgroup(main = 'PC'): PC_Negative, lipproc.Headgroup(main = 'PC', sub = ('Lyso',)): PC_Negative, lipproc.Headgroup(main = 'PI'): PI_Negative, lipproc.Headgroup(main = 'PI', sub = ('Lyso',)): PI_Negative, lipproc.Headgroup(main = 'PS'): PS_Negative, lipproc.Headgroup(main = 'PS', sub = ('Lyso',)): PS_Negative, lipproc.Headgroup(main = 'PG'): PG_Negative, lipproc.Headgroup(main = 'PG', sub = ('Lyso',)): PG_Negative, lipproc.Headgroup(main = 'BMP'): BMP_Negative, lipproc.Headgroup(main = 'PA'): PA_Negative, lipproc.Headgroup(main = 'PA', sub = ('Lyso',)): PA_Negative, lipproc.Headgroup(main = 'VA'): VA_Negative, lipproc.Headgroup(main = 'Cer'): Cer_Negative, lipproc.Headgroup(main = 'Cer', sub = ('1P',)): Cer_Negative, lipproc.Headgroup(main = 'SM'): Cer_Negative, lipproc.Headgroup(main = 'Cer', sub = ('Hex',)): Cer_Negative, lipproc.Headgroup(main = 'Cer', sub = ('Hex2',)): Cer_Negative, lipproc.Headgroup(main = 'Cer', sub = ('SHex',)): Cer_Negative, lipproc.Headgroup(main = 'Cer', sub = ('SHex2',)): Cer_Negative, lipproc.Headgroup(main = 'Cer', sub = ('PE',)): Cer_Negative, lipproc.Headgroup(main = 'Sph'): Cer_Negative, lipproc.Headgroup(main = 'Sph', sub = ('1P',)): Cer_Negative, }, 'pos': { lipproc.Headgroup(main = 'FA'): FA_Positive, lipproc.Headgroup(main = 'DAG'): DAG_Positive, lipproc.Headgroup(main = 'DGTA'): GL_Positive, lipproc.Headgroup(main = 'DGTS'): GL_Positive, lipproc.Headgroup(main = 'DGCC'): GL_Positive, lipproc.Headgroup(main = 'SQDG'): GL_Positive, lipproc.Headgroup(main = 'MGDG'): GL_Positive, lipproc.Headgroup(main = 'DGDG'): GL_Positive, lipproc.Headgroup(main = 'DGTA', sub = ('Lyso',)): GL_Positive, lipproc.Headgroup(main = 'DGTS', sub = ('Lyso',)): GL_Positive, lipproc.Headgroup(main = 'DGCC', sub = ('Lyso',)): GL_Positive, lipproc.Headgroup(main = 'SQDG', sub = ('Lyso',)): GL_Positive, lipproc.Headgroup(main = 'MGDG', sub = ('Lyso',)): GL_Positive, lipproc.Headgroup(main = 'DGDG', sub = ('Lyso',)): GL_Positive, lipproc.Headgroup(main = 'TAG'): TAG_Positive, lipproc.Headgroup(main = 'PE'): PE_Positive, lipproc.Headgroup(main = 'PE', sub = ('Lyso',)): LysoPE_Positive, lipproc.Headgroup(main = 'PC'): PC_Positive, lipproc.Headgroup(main = 'PC', sub = ('Lyso',)): LysoPC_Positive, lipproc.Headgroup(main = 'PI'): PI_Positive, lipproc.Headgroup(main = 'PI', sub = ('Lyso',)): PI_Positive, lipproc.Headgroup(main = 'PS'): PS_Positive, lipproc.Headgroup(main = 'PS', sub = ('Lyso',)): PS_Positive, lipproc.Headgroup(main = 'PG'): PG_Positive, lipproc.Headgroup(main = 'PG', sub = ('Lyso',)): PG_Positive, lipproc.Headgroup(main = 'BMP'): BMP_Positive, lipproc.Headgroup(main = 'PA'): PA_Positive, lipproc.Headgroup(main = 'PA', sub = ('Lyso',)): PA_Positive, lipproc.Headgroup(main = 'VA'): VA_Positive, lipproc.Headgroup(main = 'Cer'): Cer_Positive, lipproc.Headgroup(main = 'Cer', sub = ('1P',)): Cer_Positive, lipproc.Headgroup(main = 'Cer', sub = ('Hex',)): Cer_Positive, lipproc.Headgroup(main = 'Cer', sub = ('Hex2',)): Cer_Positive, lipproc.Headgroup(main = 'Cer', sub = ('SHex',)): Cer_Positive, lipproc.Headgroup(main = 'Cer', sub = ('SHex2',)): Cer_Positive, lipproc.Headgroup(main = 'Cer', sub = ('PE',)): Cer_Positive, lipproc.Headgroup(main = 'SM'): Cer_Positive, lipproc.Headgroup(main = 'Cer', sub = ('1P', 'Lyso')): Cer_Positive, lipproc.Headgroup(main = 'Cer', sub = ('Hex', 'Lyso')): Cer_Positive, lipproc.Headgroup(main = 'Cer', sub = ('Hex2', 'Lyso')): Cer_Positive, lipproc.Headgroup(main = 'Cer', sub = ('SHex', 'Lyso')): Cer_Positive, lipproc.Headgroup(main = 'Cer', sub = ('SHex2', 'Lyso')): Cer_Positive, lipproc.Headgroup(main = 'Cer', sub = ('PE', 'Lyso')): Cer_Positive, lipproc.Headgroup(main = 'SM', sub = ('Lyso',)): Cer_Positive, lipproc.Headgroup(main = 'Sph'): Cer_Positive, lipproc.Headgroup(main = 'Sph', sub = ('1P',)): Cer_Positive, lipproc.Headgroup(main = 'Sph', sub = ('M1',)): Cer_Positive, lipproc.Headgroup(main = 'Sph', sub = ('M2',)): Cer_Positive, lipproc.Headgroup(main = 'Sph', sub = ('M3',)): Cer_Positive, } }
[docs]class MS2Feature(object): scan_methods = { 'mgf': 'mgf_iterscans', 'mzml': 'mzml_iterscans', } def __init__( self, mz, ionmode, resources, rt, ms1_records = None, rt_range = .5, check_rt = True, ): """ Collects the MS2 scans from the provided resources for a single feature. Calls identification methods on all scans collected. :param float mz: m/z value of the precursor ion. :param str ionmode: Ion mode of the experiment. Either ``pos`` or ``neg``. :param dict resources: ``dict`` of MS2 scan resources. These are either ``mgf.MgfReader`` objects or paths to MGF files. Later more resource types will be available, for example MzML format. Keys of the ``dict`` are used as sample labels. Thes can be strings or tuples. :param dict ms1_records: A data structure resulted by ``moldb.adduct_lookup``. If ``None`` the lookup will be done here. :param float rt_range: If a single retention time value provided this is the largest accepted difference between an MS2 scan's RT and the precursor's RT. E.g. if ``rt = 8.3`` and ``rt_range = 0.5``, scans between 7.8 and 8.8 will be considered. If a tuple of floats provided for RT, scans between these two values will be considered. :param bool check_rt: Check if the retention time of the scan is enough close to the precursor's RT. If ``False``, scans will be matched only by the m/z value of the precursor and scans with any large RT difference will be analysed. """ self.mz = mz self.ionmode = ionmode self.ms1_records = ms1_records or moldb.adduct_lookup(mz, ionmode) self.resources = resources self.rt = (rt - rt_range, rt + rt_range) if type(rt) is float else rt self.rtmean = sum(self.rt) / 2.0 self.rt_range = rt_range self.check_rt = check_rt
[docs] def main(self): self.ms1_lookup() self.build_scans() self.identify()
[docs] def iterscans(self): for sample_id, resource in iteritems(self.resources): res_type = self.guess_resouce_type(resource) if res_type not in self.scan_methods: raise ValueError( 'Unknown MS2 resource type: %s' % str(resource) ) scan_method = getattr(self, self.scan_methods[res_type]) for scan in scan_method(resource, sample_id): yield scan
[docs] def mgf_iterscans(self, mgf_resource, sample_id = None): if isinstance(mgf_resource, basestring): mgffile = mgf.MgfReader(mgfname, charge = None) elif isinstance(mgf_resource, mgf.MgfReader): mgffile = mgf_resource else: raise ValueError( 'Mgf files should be lipyd.mgf.MgfReader ' 'instances of file names.' ) idx, rtdiff = mgffile.lookup(self.mz, rt = self.rtmean) for i, rtd in zip(idx, rtdiff): if self.check_rt: scan_rt = self.rtmean + rtd if scan_rt < self.rt[0] or scan_rt > self.rt[1]: continue sc = mgffile.get_scan(i) yield Scan( mzs = sc[:,0], intensities = sc[:,1], ionmode = self.ionmode, precursor = self.mz, ms1_records = self.ms1_records, scan_id = mgffile.mgfindex[i,3], sample_id = sample_id, source = mgffile.fname, deltart = rtd, rt = mgffile.mgfindex[i,2], )
[docs] def mzml_iterscans(self, mzml_resource, sample_id = None): raise NotImplementedError
[docs] @staticmethod def guess_resouce_type(res): if isinstance(res, basestring) and os.path.exists(res): if res[-3:].lower() == 'mgf': return 'mgf' elif isinstance(res, mgf.MgfReader): return 'mgf'
[docs] def build_scans(self): self.scans = np.array(list(self.iterscans())) self.deltart = np.array([sc.rt - self.rtmean for sc in self.scans]) rtsort = [ it[0] for it in sorted( (it for it in enumerate(self.deltart)), key = lambda it: abs(it[1]) ) ] self.scans = self.scans[rtsort] self.deltart = self.deltart[rtsort]
[docs] def identify(self): self.identities = [] for scan in self.scans: identity = scan.identify() if identity: self.identities.append(identity)
[docs] def identity_summary( self, scores = True, drt = True, sample_ids = False, scan_ids = False, ): identities = set() for i, scan_i in enumerate(self.identities): for sum_str, varieties in iteritems(scan_i): for var in varieties: summary = [var.__str__()] if scores: summary.append(var.score_pct) else: if var.score == 0: continue if drt: summary.append(self.deltart[i]) if sample_ids: summary.append(self.scans[i].sample_id) if scan_ids: summary.append(self.scans[i].scan_id) summary.append(sum_str) identities.add(tuple(summary)) return identities
[docs] def ms1_lookup(self): if self.ms1_records is None: self.ms1_records = moldb.adduct_lookup(self.mz)
##############################################################################
[docs]class MS2FeatureOld(object): """ Provides additional, more sophisticated methods for identification of a single feature. In the original concept all methods for identification based on MS1 and MS2 took place in class Screening(), as those could simply iterate through the arrays. Later more complex methods became necessary, so I created this class to group them. """ def __init__(self, main, protein, mode, oi, log = True): """ @main : ltp.Screening() instance One Screening() instance with MS1 and MS2 processing already done. @protein : str Protein name @mode : str MS mode (`pos` or `neg`) @oi : int Original index of one feature. @log : bool Whether output verbose messages to logfile. """ self.main = main self.log = log self.protein = protein self.mode = mode self.oi = oi self.ifracs = self.main.fraction_indices(self.protein) self.fracsi = dict(map(lambda fr: (fr[1][0], fr[0]), iteritems(self.ifracs))) self.tbl = self.main.valids[self.protein][self.mode] self.ms2 = self.tbl['ms2'][self.oi] self.i = self.main.oi2i(self.protein, self.mode, self.oi) self.fa = {} self.scans_fractions = map( lambda tpl: tuple(map(int, tpl)), uniqList( map( tuple, # scan ID, fraction ID self.ms2[:,[12,14]] ) ) ) self.classes = ['PA', 'PC', 'PE', 'PG', 'PS'] self.classes2 = ['PA', 'PC', 'PE', 'PG', 'PS', 'PI', 'SM', 'BMP', 'Cer', 'Cer1P', 'HexCer', 'HexCerOH', 'DAG', 'TAG', 'FA', 'VA', 'LysoPE', 'LysoPC'] self.identities = set([]) self.identities2 = {} # get carbon counts from MS1 self.ms1fa = self.tbl['ms1fa'][oi] # sorting by fractions/scans self.scans = dict( map( lambda sc_fr: ( # scan ID, fraction ID: key (sc_fr[0], sc_fr[1]), # MS2 array slice: value self.ms2[ np.where( np.logical_and( self.ms2[:,12] == sc_fr[0], self.ms2[:,14] == sc_fr[1] ) ) ] ), self.scans_fractions ) ) # sorting by intensity desc self.scans = dict( map( lambda i: ( i[0], i[1][i[1][:,2].argsort()[::-1],:] ), iteritems(self.scans) ) ) self.deltart = dict( map( lambda i: ( i[0], self.tbl['rtm'][self.i] - i[1][0,11] ), iteritems(self.scans) ) ) self._scans = dict( map( lambda i: ( i[0], # i[0]: (scan ID, fraction ID) # i[1]: MS2 array slice MS2Scan(i[1], i[0], self) ), iteritems(self.scans) ) ) self.maxins = dict( map( lambda i: ( i[0], i[1][0,2] ), iteritems(self.scans) ) ) self.medins = dict( map( lambda i: ( i[0], np.median(i[1][:,2]) ), iteritems(self.scans) ) ) self.sort_scans() self.select_best_scan() self.msg('\n::: Analysing feature: %s :: %s :: index = %u ::'\ ' m/z = %.03f :: number of MS2 scans: %u\n' % \ (self.protein, self.mode, self.oi, self.tbl['mz'][self.i], len(self._scans)) ) self.msg('\n::: Database lookup resulted '\ 'the following species: %s\n' % self.print_db_species()) self.msg('\n::: Intensities:\n%s%s\n' % \ (' ' * 24, ' '.join(['A09', 'A10', 'A11', 'A12', 'B01']))) self.msg('%s%s' % (' ' * 16, '=' * 63)) self.msg('\n - absolute: %s' % ' '.join( map(lambda x: '%10.01f' % x, self.tbl['fe'][self.i,:])) ) self.msg('\n - relative: %s\n' % \ ' '.join( map( lambda xx: '%10.02f%%' % (xx * 100.0), map( lambda x: x / np.nanmax(self.tbl['fe'][self.i,:]), self.tbl['fe'][self.i,:] ) ) ) ) self.msg('\n::: MS2 scans available (%u):\n\n' % len(self.scans)) for sc in self._scans.values(): sc.print_scan()
[docs] def sort_scans(self): """ Groups the scans in 3 groups: highest consists of those from the fractions with the highest protein level (there might be more than one the highest, because the fraction offset limits); the secondary contains scans from other protein containing fractions; while the other contains the scans from non protein containing fractions. Within the groups the scans are sorted from lowest to highest deltaRT. """ self.highest = [] self.secondary = [] self.other = [] with_protein = self.main.protein_containing_fractions(self.protein) for scan_num, fr in self.scans.keys(): fr_name = 'a%u' % fr if fr != 13 and fr != 1 else 'b1' if fr_name in with_protein: if fr_name == self.main.fracs_orderL[self.protein][0][0] or \ fr_name == self.main.fracs_orderU[self.protein][0][0]: self.highest.append((scan_num, fr)) else: self.secondary.append((scan_num, fr)) else: self.other.append((scan_num, fr)) self.highest = sorted(self.highest, key = lambda sc: abs(self._scans[sc].deltart)) self.secondary = sorted(self.secondary, key = lambda sc: abs(self._scans[sc].deltart)) self.other = sorted(self.other, key = lambda sc: abs(self._scans[sc].deltart))
[docs] def select_best_scan(self): self.best_scan = \ self.highest[0] if len(self.highest) else \ self.secondary[0] if len(self.secondary) else \ self.other[0] if len(self.other) else \ None
[docs] def print_db_species(self): return ', '.join( map( lambda hg: '%s' % ( hg \ if hg not in self.tbl['ms1fa'][self.oi] \ or not len(self.tbl['ms1fa'][self.oi][hg]) \ else \ ', '.join( map( lambda fa: '%s(%s)' % (hg, fa), self.tbl['ms1fa'][self.oi][hg] ) ) ), self.tbl['ms1hg'][self.oi] ) ) \ if len(self.tbl['ms1hg'][self.oi]) \ else 'none'
[docs] def reload(self, children = False): modname = self.__class__.__module__ mod = __import__(modname, fromlist=[modname.split('.')[0]]) imp.reload(mod) new = getattr(mod, self.__class__.__name__) setattr(self, '__class__', new) if children: for sc in self._scans.values(): sc.reload()
def __str__(self): return ', '.join( map( lambda hgfas: ', '.join( map( lambda fa: '%s(%s)' % (hgfas[0], fa), hgfas[1] ) ), iteritems(self.fa) ) )
[docs] def get_header_div(self): return '\t\t<div class="ms2hdr">\n\t\t'\ 'MS2 scans of feature %.04f'\ '\n\t\t\t|<span class="scansbutton morescans1"'\ ' title="Show/hide scans from fractions with '\ 'highest protein concentration">scans+</span>\n'\ '\n\t\t\t|<span class="scansbutton morescans2"'\ ' title="Show/hide scans from other protein '\ 'containing fractions">scans++</span>\n'\ '\n\t\t\t|<span class="scansbutton morescans3"'\ ' title="Show/hide scans from non protein '\ 'containing fractions">scans+++</span>\n'\ '\n\t\t\t|<span class="scansbutton morefrags"'\ ' title="Show/hide fragments after 5 highest'\ '">frags+</span>\n'\ '\n\t\t\t|<span class="scansbutton remove"'\ ' title="Remove scans of this feature'\ '">remove</span>\n'\ '\t\t</div>\n' % \ self.tbl['mz'][self.i]
[docs] def html_table(self): container = '\t<div id="%s" class="ms2tblcontainer">\n%s%s\n\t</div>' header = self.get_header_div() html = [] if self.best_scan is not None: html.append(self._scans[self.best_scan].html_table()) else: html.append('<div class="noscans">No scans '\ 'from fractions with highest protein concentration.</div>') for sc in sorted(self._scans.values(), key = lambda sc: abs(sc.deltart)): if sc.in_primary and sc.scan_id != self.best_scan: html.append(sc.html_table()) for sc in sorted(self._scans.values(), key = lambda sc: abs(sc.deltart)): if not sc.in_primary and sc.scan_id != self.best_scan: html.append(sc.html_table()) for sc in sorted(self._scans.values(), key = lambda sc: abs(sc.deltart)): if not sc.in_primary and not sc.in_secondary: html.append(sc.html_table()) html = '\n'.join(html) return container % ('ms2c_%u_%u' % \ (int(self.tbl['aaa'][self.i]), self.oi), header, html)
[docs] def html_table_b64(self): return base64.encodestring(self.html_table()).replace('\n', '')
[docs] def msg(self, text): if self.log: with open(self.main.ms2log, 'a') as f: f.write(text)
def _any_scan(self, method, **kwargs): for i, sc in iteritems(self._scans): self.msg('\t\t:: Calling method %s() on scan #%u\n' % (method, i[0])) if getattr(sc, method)(**kwargs): return True return False
[docs] def identify(self): for hg in self.classes: self.msg('\t>>> Attempting to identify %s in all scans\n' % (hg)) if self._any_scan('is_%s' % hg.lower()): self.identities.add(hg) self.msg('\t<<< Result: identified as %s\n' % hg) else: self.msg('\t<<< Result: not %s\n' % hg)
[docs] def identify2(self, num = 1): for scanid, scan in iteritems(self._scans): for hg in self.classes2: self.msg('\t>>> Attempting to identify %s in scan %u\n' % (hg, scanid[0])) identified = False if hg not in self.identities2: self.identities2[hg] = [] method = '%s_%s_%u' % (hg.lower(), self.mode, num) if hasattr(scan, method): self.identities2[hg].append(getattr(scan, method)()) identified = any( map( lambda i: i['score'] >= 5, self.identities2[hg] ) ) if identified: self.msg('\t<<< Result: identified as %s\n' % hg) else: self.msg('\t<<< Result: not %s\n' % hg) if hasattr(scan, 'fa_co_2'): del scan.fa_co_2 if hasattr(scan, 'fa_list'): scan.fa_list = None
[docs]class MS2Scan(object): """ This class represents one MS2 scan and provides methods for its analysis. """ def __init__(self, scan, scan_id, feature): self.scan = scan self.scan_id = scan_id self.feature = feature self.deltart = self.feature.deltart[self.scan_id] self.frac_id = self.scan_id[1] self.frac_name = self.feature.fracsi[self.frac_id] self.ms2_file = self.feature.main.ms2files\ [self.feature.protein][self.feature.mode][self.frac_name] self.in_primary = self.frac_name in \ self.feature.main.fracs_order[self.feature.protein]['prim'] self.in_secondary = self.frac_name in \ self.feature.main.fracs_order[self.feature.protein]['sec'] self.i = self.feature.i self.tbl = self.feature.tbl self.insmax = self.scan[0,2] self.recc = re.compile(r'.*?([0-9]{1,2}):([0-9]).*') self.fa = {} self.fa1 = {} self._order = None self.sort_by_i() self.fa_list = None self.build_fa_list()
[docs] def reload(self): modname = self.__class__.__module__ mod = __import__(modname, fromlist=[modname.split('.')[0]]) imp.reload(mod) new = getattr(mod, self.__class__.__name__) setattr(self, '__class__', new)
[docs] def print_identities(self, fname = None): """ Prints identities to standard output or file. """ if fname is None: sys.stdout.write(self.identities_str()) else: with open(fname, 'w') as fp: fp.write(self.identities_str())
[docs] def identities_str(self, num = 1): """ Returns table of all identification attempts as string. """ result = ['=== Scan #%u (fraction %s) ===' % ( self.scan_id[0], self.frac_name)] for hg in self.feature.classes2: method = '%s_%s_%u' % ( hg.lower(), self.feature.mode, num ) if not hasattr(self, method): continue idd = getattr(self, method)() result.append('%s\t%u\t%s' % ( hg, idd['score'], ', '.join(idd['fattya']) )) return '%s\n' % '\n'.join(result)
[docs] def print_scan(self): """ Prints the list of fragments as an annotated table. """ self.feature.msg(self.scan_str())
[docs] def show(self): """ Prints the scan table to standard output. """ sys.stdout.write(self.scan_str())
[docs] def scan_str(self): """ Returns the scan table as string. """ ms1mz = self.tbl['mz'][self.i] header = '\tFrag. m/z\tIntensity\tIdentity%sNL mass\n'\ '\t%s\n' % (' ' * 26, '=' * 73) table = '\n\t'.join( map( lambda sc: '%9.4f\t%10.2f\t%s%s%9.4f' % \ tuple(list(sc[[1, 2, 7]]) + \ [' ' * (32 - len(sc[7])), ms1mz - sc[1]]), self.scan ) ) fri = self.scan_id[1] - 9 if self.scan_id[1] != 1 else 4 return ( '\tScan %u (fraction %s%u; %s %s; '\ 'intensity = %.01f (%.02f%%)):\n\n%s\t%s\n\n' % \ (self.scan_id[0], self.frac_name, self.frac_id, 'contains' \ if self.feature.ifracs[self.frac_name][1] \ else 'does not contain', self.feature.protein, self.tbl['fe'][self.i, self.frac_id] \ if self.frac_id < self.tbl['fe'].shape[1] else np.nan, (self.tbl['fe'][self.i, self.frac_id] \ if self.frac_id < self.tbl['fe'].shape[1] else np.nan) / \ np.nanmax(self.tbl['fe'][self.i, :]) * 100.0, header, table) )
[docs] def html_table(self): table = '\t\t<table id="%s" class="scantbl %s">\n%s\n\t\t</table>\n' th = '\t\t\t\t<th>\n\t\t\t\t\t%s\n\t\t\t\t</th>\n' ttl = '\t\t\t<tr class="%s">\n\t\t\t\t<th colspan="4">\n\t\t\t\t\t%s'\ '\n\t\t\t\t</th>\n\t\t\t</tr>\n' tr = '\t\t\t<tr class="%s">\n%s\n\t\t\t</tr>\n' td = '\t\t\t\t<td>\n\t\t\t\t\t%s\n\t\t\t\t</td>\n' ms1mz = self.tbl['mz'][self.i] rows = ttl % ( 'scantitle', 'Scan %u (%s, %s; '\ 'intensity = %.01f (%.02f%%); dRT = %.03f min)' % ( self.scan_id[0], self.frac_name, 'the highest fraction' if self.in_primary \ else 'not the highest, but contains %s' % \ self.feature.protein if self.in_secondary \ else 'does not contain %s' % \ self.feature.protein, self.tbl['fe'][self.i, self.frac_id] \ if fri < self.tbl['fe'].shape[1] else np.nan, (self.tbl['fe'][self.i, self.frac_id] \ if fri < self.tbl['fe'].shape[1] else np.nan) / \ np.nanmax(self.tbl['fe'][self.i, :]) * 100.0, self.deltart ) ) rows += tr % ( 'scanhdr', ''.join( map( lambda cname: th % cname, ['Frag m/z', 'Intensity', 'Identity', 'NL mass'] ) ) ) for rn, row in enumerate(self.scan): rows += tr % ( 'fragrow %s' % ('first5' if rn < 5 else 'after5'), ''.join([ td % ('%.04f' % row[1]), td % ('%.02f' % row[2]), td % row[7], td % ('%.04f' % (ms1mz - row[1])) ]) ) return table % ('%u_%u_%u' % ( self.tbl['i'][self.i], self.scan_id[0], self.scan_id[1]), 'best' if self.scan_id == self.feature.best_scan \ else 'primary' if self.in_primary \ else 'secondary' if self.in_secondary \ else 'noprotein', rows )
[docs] def get_by_rank(self, rank = 1, min_mz = 0.0): this_rank = 0 return_next = False prev_mz = 0.0 intensity = '' ids = [] for r in self.scan: if r[1] < min_mz: continue if abs(r[1] - prev_mz) > 0.0001: prev_mz = r[1] this_rank += 1 if this_rank == rank: return_next = True intensity = '%.04f(%u)' % (r[1], r[2]) ids.append('%s (%.03f)' % (r[7], r[1])) elif this_rank != rank and return_next: return intensity, '; '.join(ids) return '', ''
[docs] def full_list_str(self): result = [] prev_mz = self.scan[0,1] intensity = self.scan[0,2] names = set([]) for i, r in enumerate(self.scan): if abs(r[1] - prev_mz) > 0.0001: if len(names) == 1 and list(names)[0] == 'unknown': result.append('%s (%.03f) (%u)' % ('/'.join(sorted(list(names))), r[1], intensity)) else: result.append('%s (%u)' % ('/'.join(sorted(list(names))), intensity)) names = set([]) intensity = r[2] prev_mz = r[1] names.add(r[7]) result.append('%s (%u)' % ('/'.join(sorted(list(names))), intensity)) return '; '.join(result)
[docs] def most_abundant_mz(self): result = self.scan[0,1] self.feature.msg('\t\t -- Most abundant m/z is %.03f\n' % result) return result
[docs] def mz_match(self, mz_detected, mz): return abs(mz_detected - mz) <= self.feature.main.ms2_tlr
[docs] def sort_by_mz(self): """ Sorts the scan array by m/z increasing. """ self._order = self._order[self.scan[:,1].argsort()] self.scan = self.scan[self.scan[:,1].argsort(),:]
[docs] def sort_by_i(self, return_order = False): """ Sorts the scan array by intensity decreasing. """ if self._order is None: order = self.scan[:,2].argsort()[::-1] self.scan = self.scan[order,:] self._order = np.array(xrange(self.scan.shape[0]), dtype = np.int) else: order = self._order.argsort() self.scan = self.scan[order,:] self._order = self._order[order] if return_order: return order
[docs] def mz_lookup(self, mz): """ Returns the index of the closest m/z value detected in the scan if it is within the range of tolerance, otherwise None. """ du = 999.0 dl = 999.0 self.sort_by_mz() ui = self.scan[:,1].searchsorted(mz) if ui < self.scan.shape[0]: du = self.scan[ui,1] - mz if ui > 0: dl = mz - self.scan[ui - 1,1] i = ui if du < dl else ui - 1 i = i if self.mz_match(self.scan[i,1], mz) else None sort = self.sort_by_i(return_order = True) if i is not None: i = np.where(sort == i)[0][0] return i
[docs] def has_mz(self, mz): """ Tells if an m/z exists in this scan. """ result = self.mz_lookup(mz) is not None self.feature.msg('\t\t -- m/z %.03f occures in this scan? -- %s\n' % \ (mz, str(result))) return result
[docs] def has_nl(self, nl): """ Tells if a neutral loss exists in this scan. """ result = self.has_mz(self.ms1_mz() - nl) self.feature.msg('\t\t -- neutral loss of %.03f occures in '\ 'this scan? Looked up m/z %.03f - %.03f = %.03f -- %s\n' % \ (nl, self.feature.tbl['mz'][self.feature.i], nl, self.feature.tbl['mz'][self.feature.i] - nl, str(result))) return result
[docs] def ms1_mz(self): """ Returns the MS1 m/z (which should be the precursor ion). """ return self.feature.tbl['mz'][self.feature.i]
[docs] def nl_lookup(self, nl): """ Looks up if a neutral loss exists in this scan and returns its index. """ return self.mz_lookup(self.feature.tbl['mz'][self.feature.i] - nl)
[docs] def most_abundant_mz_is(self, mz): """ Tells if the m/z with the highest intensity is `mz`. """ result = self.mz_match(self.most_abundant_mz(), mz) self.feature.msg('\t\t -- m/z %.03f is the most abundant? -- %s\n' % \ (mz, str(result))) return result
[docs] def mz_among_most_abundant(self, mz, n = 2): """ Tells if an m/z is among the most aboundant `n` fragments in a spectrum. :param float mz: The m/z value. :param int n: The number of most abundant fragments considered. """ result = False for i in xrange(min(n, self.scan.shape[0])): if self.mz_match(self.scan[i,1], mz): result = True break self.feature.msg('\t\t -- m/z %.03f is among the %u most abundant? -- '\ '%s\n' % (mz, n, str(result))) return result
[docs] def nl_among_most_abundant(self, nl, n = 2): """ Tells if a neutral loss corresponds to one of the most aboundant `n` fragments in a spectrum. :param float nl: The mass of the neutral loss. :param int n: The number of most abundant fragments considered. """ result = False for i in xrange(min(n, self.scan.shape[0])): if self.mz_match(self.scan[i,1], self.ms1_mz() - nl): result = True break self.feature.msg('\t\t -- neutral loss %.03f is among '\ 'the %u most abundant? -- '\ '%s\n' % (nl, n, str(result))) return result
[docs] def get_intensity(self, mz): """ Returns the intensity of a fragment ion from its m/z. Value is `None` if m/z does not present. """ i = self.mz_lookup(mz) if i is not None: return self.intensities[i,2] return None
[docs] def get_nl_intensity(self, nl): """ Returns the intensity of a fragment ion from its a neutral loss. Value is `None` if neutral loss does not present. """ return self.get_intensity(self.ms1_mz() - nl)
[docs] def mz_percent_of_most_abundant(self, mz, percent = 80.0): """ Tells if an m/z has at least certain percent of intensity compared to the most intensive fragment. :param float mz: The m/z value. :param float percent: The threshold in percent of the highest intensity. """ insmax = self.scan[0,2] result = False for frag in self.scan: if self.mz_match(frag[1], mz): result = True break if frag[2] < insmax * 100.0 / percent: result = False break self.feature.msg('\t\t -- m/z %.03f has abundance at least %.01f %% of'\ ' the highest abundance? -- %s\n' % \ (mz, percent, str(result))) return result
[docs] def fa_type_is(self, i, fa_type, sphingo = False, uns = None, scan_index = True): """ Tells if a fatty acid fragment is a specified type. The type should be a part of the string representation of the fragment, e.g. `-O]` for fragments with one oxygen loss. """ ifa = None if not scan_index: ifa = i i = self.fa_list[ifa][5] result = ( (fa_type in self.scan[i,8] or fa_type in self.scan[i,7]) and (not sphingo or 'Sphingosine' in self.scan[i,7]) and (uns is None or ifa is None or self.fa_list[ifa][0][1] <= uns) ) self.feature.msg('\t\t -- Fragment #%u (%s, %s): fatty acid type '\ 'is %s? -- %s\n' % \ (i, self.scan[i,7], self.scan[i,8], fa_type, str(result))) return result
[docs] def is_fa(self, i, sphingo = False): """ Examines whether a fragment is fatty acid-like or not. In the labels of fatty acid fragments we always """ result = 'FA' in self.scan[i,7] or 'Lyso' in self.scan[i,7] or \ (sphingo and 'Sphi' in self.scan[i,7]) self.feature.msg('\t\t -- Fragment #%u (%s): is fatty acid? '\ '-- %s\n' % (i, self.scan[i,7], str(result))) return result
[docs] def most_abundant_fa(self, fa_type, head = 1, sphingo = False): """ Returns `True` if there is a fatty acid among the most abundant fragments and it is of the defined type; `False` if there is no fatty acid, or it is different type. :param str fa_type: The type of the fatty acid fragment ion. :param int head: The number of most abundant fragments considered. :param bool sphingo: Look for a sphingolipid backbone. """ result = False for i in xrange(self.scan.shape[0]): if i == head: break if self.is_fa(i, sphingo = sphingo): result = self.fa_type_is(i, fa_type, sphingo = sphingo) self.feature.msg('\t\t -- Having fatty acid %s among %u most abundant '\ 'features? -- %s\n' % (fa_type, head, str(result))) return result
[docs] def get_most_abundant_fa(self, fa_type = None, head = 1, sphingo = False): """ Looks up the most abundant fatty acid fragment of the given type. Returns tuple with mz, intensity, carbon count and unsaturation, index. """ self.build_fa_list() for fa_frag in self.fa_list[:head]: if ( fa_type is None or self.fa_type_is(fa_frag[5], fa_type, sphingo) ): return self.scan[fa_frag[5],1], fa_frag[4], fa_frag[0], fa_frag[5] return None, None, None, None
[docs] def fa_cc_among_most_abundant(self, cc, hg, n = 2, sphingo = False): """ Returns `True` if there is one fatty acid with the defined carbon count and unsaturation and compatible with the given headgroup among the most abundant `n` fragments. """ self.build_fa_list() for fa_frag in self.fa_list: if fa_frag[5] >= n: break if ( fa_frag[0] == cc and (fa_frag[1] is None or hg in fa_frag[1]) and (sphingo or fa_frag[3]) ): return True return False
[docs] def fa_among_most_abundant(self, fa_type, n = 2, min_mass = None, sphingo = False, uns = None): """ Returns `True` if there is one of the defined type of fatty acid fragments among the given number of most abundant fragments, and it has a mass greater than the given threhold. """ self.build_fa_list() result = False for i, fa in enumerate(self.fa_list): if not sphingo or fa[3] and ( min_mass is None or self.scan[fa[5],1] >= min_mass ): if min_mass is not None: self.feature.msg('\t\t\t-- Fragment #%u having mass larger '\ 'than %.01f\n' % (i, min_mass)) if self.fa_type_is(i, fa_type, sphingo = sphingo, uns = uns, scan_index = False): result = True if i == n: break elif min_mass is not None: self.feature.msg('\t\t\t-- Fragment #%u having mass lower '\ 'than %.01f\n' % (i, min_mass)) self.feature.msg('\t\t -- Having fatty acid fragment %s among %u most '\ 'abundant -- %s\n' % (fa_type, n, str(result))) return result
[docs] def fa_percent_of_most_abundant(self, fa_type, percent = 80.0, sphingo = False): for i in xrange(self.scan.shape[0]): if self.is_fa(i, sphingo = sphingo): if self.fa_type_is(i, fa_type, sphingo = sphingo): return True if self.scan[i,2] < self.insmax * 100.0 / percent: return False return False
[docs] def mz_most_abundant_fold(self, mz, fold): """ Tells if an m/z is the most abundant fragment and it has at least a certain fold higher intensity than any other fragment. :param float mz: The m/z value. :param float fold: The m/z must be this times higher than any other. """ result = False if self.most_abundant_mz_is(mz): result = self.scan.shape[0] == 1 or \ self.scan[1,2] * fold <= self.scan[0,2] self.feature.msg('\t\t -- m/z %.03f is at least %u times higher than '\ 'any other? -- %s\n' % (mz, fold, str(result))) return result
[docs] def sum_cc_is(self, cc1, cc2, cc): """ Returns `True` if the sum of the 2 carbon counts and unsaturations is equal with the third one. :param tuple cc1: Carbon count and unsaturation 1. :param tuple cc2: Carbon count and unsaturation 2. :param str cc: Expected total carbon count and unsaturation. """ return self.cc2str(self.sum_cc([cc1, cc2])) == cc
[docs] def cer_fa_test(self, frag1, frag2): return \ self.fa_type_is(frag1[5], 'CerFA(') and \ self.fa_type_is(frag2[5], 'CerSphi-N(') and \ frag1[4] > frag2[4] * 2
[docs] def fa_combinations3(self, hg, head = None, expected_intensities = None): """ Finds all combinations of 3 fatty acids which match the total carbon count and unsaturation resulted by database lookups of the MS1 precursor mass. This can be used for example at TAG. :param str hg: The short name of the headgroup, e.g. `TAG`. :param int head: If `None` or `numpy.inf` all fragment ions will be considered, otherwise only the first most aboundant until the number `head`. """ result = set([]) if hg in self.feature.ms1fa and len(self.feature.ms1fa[hg]): ccs = list(self.feature.ms1fa[hg]) else: return result head = np.inf if head is None else head for cc in ccs: try: icc = self.cc2int(cc) except AttributeError: continue for frag0 in self.fa_list: if frag0[5] >= head: break cc0 = frag0[0] cc12e = '%u:%u' % tuple(map(lambda x: x[0] - x[1], zip(*[icc, cc0]))) cc12s = self.fa_combinations_tuples(cc12e, head = head, by_cc = True) for cc12 in cc12s: cc012 = '/'.join(sorted(list(cc12[0]) + [self.cc2str(cc0)])) if self.sum_cc_str(cc012) == icc: if self.intensity_ratios([ (cc12[1][0], cc12[2][0]), (cc12[1][1], cc12[2][1]), (frag0[4], frag0[5])], expected = expected_intensities ): result.add(cc012) return result
[docs] def intensity_ratios(self, intensities, expected = None, logbase = 1.5): """ Tells if the ratio of a list of intensities fits the one in `expected` or is even if `expected` is `None`. :param list intensities: List of tuples, first element is the intensity, the second is an uniqe identifier of the fragments. :param list expected: List with expected intensity proportions. E.g. `[1, 1, 2]` means the third ion is twice higher intense than the 2 others. :param int logbase: The fold difference tolerance when comparing intensities. E.g. if this is 2, then an almost twice less or more intense ion will considered to have similar intensity. """ if len(intensities) == 1: return True i = intensities if any(map(lambda ii: ii[0] <= 0.0, i)): return False # to know if one fragment contributes more than one times; # intensities divided by the times the fragment is incident cntr = collections.Counter(map(lambda ii: ii[1], i)) # by default expecting more or less equal intensities if expected is None: expected = [1.0] * len(i) i = list( map( lambda ii: (ii[1][0] / (expected[ii[0]] * cntr[ii[1][1]]), ii[1][1]), enumerate(i) ) ) return ( all( map( lambda co: ( (math.log(co[0][0], logbase) - math.log(co[1][0], logbase)) <= 1 ), itertools.combinations(i, 2) ) ) )
[docs] def fa_combinations_old(self, hg, sphingo = False, head = None, by_cc = False): """ Finds all combinations of 2 fatty acids which match the total carbon count and unsaturation resulted by database lookups of the MS1 precursor mass. Alternatively a carbon count and unsaturation can be provided if `by_cc` is set to `True`. :param str hg: Short name of the headgroup, e.g. `PC`; or cc:unsat e.g. `32:1` if `by_cc` is `True`. :param bool sphingo: Assume sphingolipid. :param int head: If `None` the total fragment list used, if a number, only the most intensive fragments accordingly. :param bool by_cc: Use the MS1 database identification to find out the possible carbon counts and unsaturations for the given headgroup, or a cc:uns provided and search combinations accordingly. """ result = set([]) if hg in self.feature.ms1fa and len(self.feature.ms1fa[hg]): ccs = list(self.feature.ms1fa[hg]) elif by_cc: ccs = [hg] else: return result head = np.inf if head is None else head self.build_fa_list() for cc in ccs: for frag1 in self.fa_list: for frag2 in self.fa_list: result.update( self.get_fa_combinations(frag1, frag2, hg, cc, sphingo, head) ) return result
[docs] def fa_combinations_preprocess(self, regenerate = False): """ Generates a lookup table for all possible combinations of two fatty acids. """ if not hasattr(self, 'fa_co_2') or regenerate: self.fa_co_2 = {} l = self.fa_list for i, j in itertools.combinations_with_replacement( xrange(len(self.fa_list)), 2): key = self.sum_cc([(l[i][0][0], l[i][0][1]), (l[j][0][0], l[j][0][1])]) if key not in self.fa_co_2: self.fa_co_2[key] = set([]) self.fa_co_2[key].add((i, j))
[docs] def fa_combinations(self, hg, sphingo = False, head = None, by_cc = False): """ Finds all combinations of 2 fatty acids which match the total carbon count and unsaturation resulted by database lookups of the MS1 precursor mass. Alternatively a carbon count and unsaturation can be provided if `by_cc` is set to `True`. This method does the same as `fa_combinations` but works with a preprocessed lookup table. Returns set of strings. :param str hg: Short name of the headgroup, e.g. `PC`; or cc:unsat e.g. `32:1` if `by_cc` is `True`. :param bool sphingo: Assume sphingolipid. :param int head: If `None` the total fragment list used, if a number, only the most intensive fragments accordingly. :param bool by_cc: Use the MS1 database identification to find out the possible carbon counts and unsaturations for the given headgroup, or a cc:uns provided and search combinations accordingly. """ return set(map(lambda co: '%s/%s' % co[0], self.fa_combinations_tuples(hg, sphingo, head, by_cc)))
[docs] def fa_combinations_tuples(self, hg, sphingo = False, head = None, by_cc = False): """ Finds all combinations of 2 fatty acids which match the total carbon count and unsaturation resulted by database lookups of the MS1 precursor mass. Alternatively a carbon count and unsaturation can be provided if `by_cc` is set to `True`. Returns tuples of tuples with carbon count/unsaturation, intensities and indices. :param str hg: Short name of the headgroup, e.g. `PC`; or cc:unsat e.g. `32:1` if `by_cc` is `True`. :param bool sphingo: Assume sphingolipid. :param int head: If `None` the total fragment list used, if a number, only the most intensive fragments accordingly. :param bool by_cc: Use the MS1 database identification to find out the possible carbon counts and unsaturations for the given headgroup, or a cc:uns provided and search combinations accordingly. """ result = [] if hg in self.feature.ms1fa and len(self.feature.ms1fa[hg]): ccs = list(self.feature.ms1fa[hg]) elif by_cc: ccs = [hg] else: return result head = np.inf if head is None else head self.build_fa_list() self.fa_combinations_preprocess() for cc in ccs: icc = self.cc2int(cc) if icc in self.fa_co_2: for i, j in self.fa_co_2[icc]: frag1 = self.fa_list[i] frag2 = self.fa_list[j] result.extend( self.get_fa_combinations(frag1, frag2, hg, cc, sphingo, head) ) return result
[docs] def get_fa_combinations(self, frag1, frag2, hg, cc, sphingo, head): """ Processes two fatty acid fragments to decide if their combination is valid. """ result = [] if frag1[5] >= head or frag2[5] >= head: return result if hg == 'Cer' and not self.cer_fa_test(frag1, frag2): # where not the 'CerFA' is the most intensive # those are clearly false return result if frag1[0][0] is not None and frag2[0][0] is not None and \ (frag1[1] is None or hg in frag1[1]) and \ (frag2[1] is None or hg in frag2[1]) and \ (not sphingo or frag1[3] or frag2[3]): if self.sum_cc_is(frag1[0], frag2[0], cc): ether_1 = 'O-' if frag1[2] else '' ether_2 = 'O-' if frag2[2] else '' fa_1 = '%s%u:%u' % (ether_1, frag1[0][0], frag1[0][1]) fa_2 = '%s%u:%u' % (ether_2, frag2[0][0], frag2[0][1]) if frag1[3]: fa_1 = 'd%s' % fa_1 elif frag2[3]: sph = 'd%s' % fa_2 fa_2 = fa_1 fa_1 = sph if not frag1[3] and not frag2[3]: fa = tuple(sorted([fa_1, fa_2])) else: fa = (fa_1, fa_2) result.append(( fa, (frag1[4], frag2[4]), (frag1[5], frag2[5]) )) return result
[docs] def matching_fa_frags_of_type(self, hg, typ, sphingo = False, return_details = False): """ Returns carbon counts of those fragments which are of the given type and have complement fatty acid fragment of any type. Details is a dict with carbon counts as keys and fragment names as values. """ result = set([]) details = {} if hg in self.feature.ms1fa and len(self.feature.ms1fa[hg]): for cc in self.feature.ms1fa[hg]: self.build_fa_list() for frag1 in self.fa_list: for frag2 in self.fa_list: if frag1[0][0] is not None and \ frag2[0][0] is not None and \ (frag1[1] is None or hg in frag1[1]) and \ (frag2[1] is None or hg in frag2[1]) and \ (not sphingo or frag1[3]): if self.fa_type_is(frag1[5], typ) and \ self.sum_cc_is(frag1[0], frag2[0], cc): result.add(frag1[0]) if return_details: if frag1[0] not in details: details[frag1[0]] = set([]) details[frag1[0]].add(self.scan[frag2[5],7]) if return_details: return (result, details) else: return result
[docs] def cer_missing_fa(self, cer_hg): """ Infers the fatty acid carbon count and unsaturation by subtracting the sphingoid backbone from the total. This works with Cer, CerP and HexCer. """ result = set([]) cer_ccs = set([]) for frag in self.scan[:5]: if 'phingo' in frag[7]: cer_ccs.add(self.get_cc(frag[7])) if cer_hg in self.feature.ms1fa: for cc in self.feature.ms1fa[cer_hg]: cc = self.get_cc(cc) for cer_cc in cer_ccs: carb = cc[0] - cer_cc[0] unsat = cc[1] - cer_cc[1] result.add('d%u:%u/%u:%u' % ( cer_cc[0], cer_cc[1], carb, unsat)) return result
[docs] def cer_matching_fa(self, cer_fa): score = 0 if 'Cer' in self.feature.ms1fa: cer_cc = self.get_cc(cer_fa) for cc in self.feature.ms1fa['Cer']: cc = self.get_cc(cc) carb = cc[0] - cer_cc[0] unsat = cc[1] - cer_cc[1] + 2 if self.frag_name_present( '[FA-alkyl(C%u:%u)-H]-' % (carb, unsat)): score += 1 carb = cc[0] - cer_cc[0] - 2 unsat = cc[1] - cer_cc[1] + 1 if self.frag_name_present( '[FA-alkyl(C%u:%u)-H]-' % (carb, unsat)): score += 1 return score
[docs] def build_fa_list(self, rebuild = False): """ Returns list with elements: carbon count, headgroups (set or None), esther (False) or ether (True), sphingosine (True) or fatty acid (False), fragment intensity and row index """ if self.fa_list is None or rebuild: self.fa_list = [] for i, frag in enumerate(self.scan): if frag[7] != 'unknown' and self.is_fa(i, sphingo = True): cc = self.get_cc(frag[7]) hgs = self.get_hg(frag[7]) is_ether = 'alk' in frag[7] is_sphingo = 'Sphi' in frag[7] self.fa_list.append([cc, hgs, is_ether, is_sphingo, frag[2], i])
[docs] def get_hg(self, frag_name): hgfrags = self.feature.main.nHgfrags \ if self.feature.mode == 'neg' \ else self.feature.main.pHgfrags return hgfrags[frag_name] \ if frag_name in hgfrags and \ len(hgfrags[frag_name]) \ else None
[docs] def get_cc(self, fa): """ Extracts carbon count from any string, for example fatty acid names. Recognizes the pattern [number:number]. E.g. from `Phosphatidylcholine (36:1)` returns the tuple `(36, 1)`. To convert pure cc:uns strings, use the method `cc2int` instead, as that one is faster. :param str fa: Any string containing carbon count and unsaturation. """ m = self.recc.match(fa) if m is not None: return tuple(map(int, m.groups())) return (None, None)
[docs] def most_abundant_fa_cc(self, fa_type = None, head = 2): fa_cc = [] for i, frag in enumerate(self.scan): if i == head: break if self.is_fa(i) and ( fa_type is None or self.fa_type_is(i, fa_type) ): cc = self.get_cc(frag[7]) if cc[0] is not None: fa_cc.append((cc, frag[2])) return fa_cc
[docs] def cc2str(self, cc): """ Converts carbon count and unsaturation from tuple of integers to string. E.g. `(18, 1)` results `18:1`. :param tuple cc: Tuple of 2 integers representing carbon count and unsaturation. """ return '%u:%u' % cc
[docs] def ccs2str(self, ccs): """ Converts multiple carbon counts and unsaturations from tuples of integers format to string. E.g. `[(18, 1), (18, 0)]` results `18:1/18:0`. :param list ccs: List of tuples of integers. """ return '/'.join(map(self.cc2str, sorted(ccs)))
[docs] def cc2int(self, cc): """ Converts carbon count and unsaturation from string format to tuple of integers. E.g. `18:1` results `(18, 1)`. :param str cc: String representing carbon count and unsaturation separated by colon. """ return tuple(map(int, cc.split(':')))
[docs] def ccs2int(self, ccs): """ Converts a string of multiple carbon counts and unsaturations to a list of tuples of integers. :param str ccs: Multiple carbon counts and unsaturations in string representation, e.g. `18:1/16:0`. """ return list(map(self.cc2int, ccs.split('/')))
[docs] def sum_cc_str(self, ccs): """ Returns the sum of multiple carbon counts and unsaturations. Accepts string format and results string format. :param str ccs: Multiple carbon counts and unsaturations in string representation, e.g. `18:1/16:0`. """ return self.sum_cc(self.ccs2int(ccs))
[docs] def sum_cc(self, ccs): """ Adds numeric carbon counts and unsaturations. Accepts a list of tuples of integers, returns a tuple of integers. :param list ccs: A list with tuples of integers, e.g. `[(14, 1), (16, 0)]`. """ return ( tuple( reduce( lambda cu1, cu2: # here `cu`: carbon count and unsaturation (cu1[0] + cu2[0], cu1[1] + cu2[1]), ccs ) ) )
[docs] def sum_cc2(self, ccs): """ Returns the total carbon count and unsaturation in tuple format from a list of tuples where the first element of the tuple is another tuple with the cc and uns, and the second is the intensity, which is discarded here. :param list ccs: List of the format described above. E.g. `[((16, 0), 1000.0), ((18, 1), 722)]`, this results `(34, 1)`. """ return self.sum_cc(map(lambda cci: cci[0], ccs))
[docs] def sum_cc2str(self, ccs): """ Returns the total carbon count and unsaturation in string format from a list of tuples where the first element of the tuple is another tuple with the cc and uns, and the second is the intensity, which is discarded here. :param list ccs: List of the format described above. E.g. `[((16, 0), 1000.0), ((18, 1), 722)]`, this results `34:1`. """ return self.cc2str(self.sum_cc2(ccs))
[docs] def add_fa1(self, fa, hg): if hg not in self.fa1: self.fa1[hg] = set([]) self.fa1[hg].add( tuple( map( lambda fai: fai[0], fa ) ) ) fastr = ', '.join( map( lambda fai: self.cc2str(fai[0]), fa ) ) self.feature.msg('\t\t -- Adding fatty acids %s at headgroup '\ '%s\n' % (fastr, hg))
[docs] def fa_ccs_agree_ms1(self, hg, fa_type = None, head = 2): fa_cc = self.most_abundant_fa_cc(fa_type = fa_type, head = head) if len(fa_cc) > 0: cc = self.sum_cc2str([fa_cc[0]] * 2) agr = self.fa_cc_agrees_ms1(cc, hg) if agr: self.add_fa1(fa_cc[:1], hg) if len(fa_cc) > 1: cc = self.sum_cc2str(fa_cc[:2]) agr = self.fa_cc_agrees_ms1(cc, hg) if agr: self.add_fa1(fa_cc[:2], hg) return hg in self.fa
[docs] def fa_cc_agrees_ms1(self, cc, hg): result = False if hg in self.feature.ms1fa and cc in self.feature.ms1fa[hg]: if hg not in self.feature.fa: self.feature.fa[hg] = set([]) if hg not in self.fa: self.fa[hg] = set([]) self.feature.fa[hg].add(cc) self.fa[hg].add(cc) result = True self.feature.msg('\t\t -- Carbon count from MS2: %s; from databases '\ 'lookup: %s -- Any of these matches: %s\n' % \ ( cc, str(self.feature.ms1fa[hg]) \ if hg in self.feature.ms1fa else '-', str(result)) ) return result
[docs] def frag_name_present(self, name): return name in self.scan[:,7]
#### New methods
[docs] def cer1p_neg_1(self): """ Examines if a negative mode MS2 spectrum is a Ceramide-1-phosphate. **Specimen:** - GLTPD1 - 616.47 **Principle:** - The most abundant fragment is 78.9591 metaphosphate. - If 96.9696 phosphate present adds to the score. """ score = 0 fattya = set([]) if self.most_abundant_mz_is(78.95905658): score += 5 if self.has_mz(96.96962158): score += 1 return {'score': score, 'fattya': fattya}
[docs] def hexcer_neg_1(self): """ Examines if a negative mode MS2 spectrum is a Hexosyl-Ceramide. **Specimen:** - GLTP - 744.5627 **Principle:** - Hexose fragments 71.0115, 89.0220 and 101.0219 must present. """ score = 0 fattya = set([]) if all(map(lambda mz: self.mz_among_most_abundant(mz, n = 10), # these are 3 fragments found at GLTP [71.0115000, 89.0220000, 101.021900])): score += 5 return {'score': score, 'fattya': fattya}
[docs] def hexceroh_neg_1(self): """ Examines if a negative mode MS2 spectrum is a Hexosyl-Ceramide-OH ('t'). This method is the same as `hexcer_neg_1`. **Specimen:** - GLTP - 760.557 **Principle:** - Hexose fragments 71.0115, 89.0220 and 101.0219 must present. """ return self.hexcer_neg_1()
[docs] def hexcer_pos_1(self): """ Examines if a positive mode MS2 spectrum is a Hexosyl-Ceramide. **Specimen:** - GLTP + 810.68 **Principle:** - Hexose fragments 198.0740, 180.0634 and 162.0528 must present. """ score = 0 fattya = set([]) hexfrags = sum(map(lambda nl: self.nl_among_most_abundant(nl, n = 15), [198.073955, 180.06339, 162.052825])) if hexfrags: score += hexfrags + 4 if score: fattya.update(self.cer_missing_fa('HexCer')) return {'score': score, 'fattya': fattya}
[docs] def hexceroh_pos_1(self): """ Examines if a positive mode MS2 spectrum is a Hexosyl-Ceramide-OH (`t`). This method is the same as `hexcer_pos_1`. **Specimen:** - GLTP + 826.67 **Principle:** - Hexose fragments 198.0740, 180.0634 and 162.0528 must present. """ return self.hexcer_pos_1()
[docs] def cer1p_pos_1(self): """ Examines if a positive mode MS2 spectrum is a Ceramide-1-phosphate. **Specimen:** - GLTPD1 + 728.59 **Principle:** - A shpingosine backbone with 2 H2O loss must be among the 3 highest intensity fragments. - Presence of any of the following fragments increases the score: 82.0651, 107.0729, 135.1043, 149.1199. """ score = 0 fattya = set([]) if self.fa_among_most_abundant('-H2O-H2O+]+', n = 3, sphingo = True): score += 4 if any(map(self.has_mz, # these present at Cer too # a specific difference needed! [82.0651257, 107.072951, 135.104251, 149.119901])): score += 1 fattya.update(self.cer_missing_fa('Cer1P')) return {'score': score, 'fattya': fattya}
[docs] def dag_pos_1(self): """ Examines if a positive mode MS2 spectrum is a DAG. **Specimen:** - SEC14L2 + 584.52 - in vitro: BNIP2 + 770.67 **Principle:** - Combination of fatty acid fragments among the 10 most abundant fragments must match the expected carbon count and unsaturation. - If these are among the 5 highest fragments the score is higher. """ score = 0 fattya = set([]) if(self.fa_combinations('DAG', head = 10)): score += 4 if(self.fa_combinations('DAG', head = 6)): score += 2 fattya.update(self.fa_combinations('DAG')) return {'score': score, 'fattya': fattya}
[docs] def dag_neg_1(self): """ Examines if a negative mode MS2 spectrum is a DAG. **Specimen:** - We don't have yet. **Principle:** - Combination of fatty acid fragments among the 10 most abundant fragments must match the expected carbon count and unsaturation. - If these are among the 5 highest fragments the score is higher. """ score = 0 fattya = set([]) if(self.fa_combinations('DAG', head = 10)): score += 4 if(self.fa_combinations('DAG', head = 6)): score += 2 fattya.update(self.fa_combinations('DAG')) return {'score': score, 'fattya': fattya}
[docs] def tag_neg_1(self): """ Examines if a negative mode MS2 spectrum is a TAG. **Specimen:** - We don't have yet. **Principle:** - Combination of fatty acid fragments must match the expected carbon count and unsaturation. """ score = 0 fattya = set([]) fattya.update(self.fa_combinations3('TAG')) if fattya: score += 5 return {'score': score, 'fattya': fattya}
[docs] def tag_pos_1(self): """ Examines if a positive mode MS2 spectrum is a TAG. **Specimen:** - STARD11 + 818.7187 **Principle:** - Combination of fatty acid fragments must match the expected carbon count and unsaturation. """ score = 0 fattya = set([]) fattya.update(self.fa_combinations3('TAG')) if fattya: score += 5 return {'score': score, 'fattya': fattya}
[docs] def pi_pos_1(self): """ Examines if a negative MS2 spectrum is Phosphatidylinositol. **Specimen:** - SEC14L2 + 906.60 and 882.6 **Principle:** - Combinations of fatty acid fragments must match the expected carbon count and unsaturation for PI. - Presence of neutral losses 259.0219 and 277.0563 adds to the score. """ score = 0 fattya = set([]) fattya.update(self.fa_combinations('PI')) if fattya: score += 1 if self.has_nl(259.021894): score += 4 if self.has_nl(277.056272): score += 4 return {'score': score, 'fattya': fattya}
[docs] def ps_pos_1(self): """ Examines if a positive mode MS2 spectrum is a Phosphatidylserine. **Specimen:** - BPI + 790.56 **Principle:** - PS headgroup neutral loss 185.0089 must be the highest intensity. """ score = 0 fattya = set([]) if self.nl_among_most_abundant(185.008927, 1): score += 5 fattya.update(self.fa_combinations('PS')) return {'score': score, 'fattya': fattya}
[docs] def bmp_pos_1(self): """ Examines if a positive mode MS2 spectrum is a Bismonoacylglycerophosphate. **Specimen:** - BPIFB2 + 792.57 **Principle:** - A glycerol+fatty acid fragment can be found among the 3 highest? - The PG headgroup neutral loss (189.0402) is among the fragments? - If so, does it have a lower intensity than half of the fatty acid+glycerol fragment? """ score = 0 fattya = set([]) if self.fa_among_most_abundant('+G(', 3): fattya.update(self.fa_combinations('BMP')) if fattya: score += 4 hg_int = self.get_nl_intensity(189.0402) if hg_int: gfa_highest = self.get_most_abundant_fa('+G(', head = 4) if gfa_highest[1] < hg_int * 2: score = 0 fattya = set([]) return {'score': score, 'fattya': fattya}
[docs] def pg_pos_1(self): """ Examines if a positive mode MS2 spectrum is a Phosphatidylglycerol. At in vivo observed only in standard. **Principle:** - The PG headgroup neutral loss (189.0402) is the fragment ion with the highest intensity? """ score = 0 fattya = set([]) if self.nl_among_most_abundant(189.0402, 1): score += 5 fattya.update(self.fa_combinations('PG')) if fattya: score += 4 return {'score': score, 'fattya': fattya}
[docs] def va_pos_1(self): """ Examines if a positive MS2 spectrum is vitamin A (retinol). **Specimen:** - RBP1 + 269.2245 - RBP4 + 269.2245 **Principle:** - The most abundant ion is the whole molecule m/z = 269.224. - Presence off 3 other ions adds to the score but not mandatory: 213.165, 145.1027, 157.1028. """ score = 0 fattya = set([]) if self.mz_among_most_abundant(269.224, 3): score += 5 score += sum(map(self.has_mz, [213.165, 145.1027, 157.1028])) return {'score': score, 'fattya': fattya}
[docs] def bmp_neg_1(self): """ Examines if a negative mode MS2 spectrum is Phosphatidylglycerol. The result will be the same as `bmp_neg_1`, as in negative mode we do not know a way to distinguish these species. **Specimen:** - GM2A - 799.54 - BPIFB2 - 773.5258 (might be BMP) **Principle:** - The most abundant fragment is a fatty acid [M-H]- ion. - The 152.9958 glycerophosphate fragment must be present. - If Lyso-PG fragment present with carbon count complementing the [M-H]- fatty acid score is higher. - Presence of 171.0064 headgroup fragment adds to the score. """ return self.pg_neg_1()
#### End: new methods
[docs] def pe_neg_1(self): """ Examines if a negative mode MS2 spectrum is Phosphatidylethanolamine. **Specimen:** - GM2A - 714.507 **Principle:** - The most abundant fragment is a fatty acid [M-H]- ion. - 140.0118 PE headgroup must be present. - Other headgroup ions 196.0380 and 178.0275 add to the score. - Lyso-PE and [M-H-CO2]- fatty acid fragments complementing the highest [M-H]- fatty acid increase the score. """ score = 0 fattya = set([]) if ( self.is_fa(0) and self.fa_type_is(0, '-H]-') and self.has_mz(140.0118206) and not self.lysope_neg_1()['score'] ): score += 5 fattya = self.fa_combinations('PE') if self.has_mz(196.0380330): score += 1 if self.has_mz(178.0274684): score += 1 fa_h_ccs = self.matching_fa_frags_of_type('PE', '-H]-') for fa_h_cc in fa_h_ccs: for fa_other in [ '[Lyso-PE(C%u:%u)-]-', '[Lyso-PE-alkyl(C%u:%u)-H2O]-', '[Lyso-PE-alkyl(C%u:%u)-]-', '[FA(C%u:%u)-H-CO2]-' ]: if self.frag_name_present(fa_other % fa_h_cc): score += 1 return {'score': score, 'fattya': fattya}
[docs] def lysope_neg_1(self): """ Examines if a negative mode MS2 spectrum is Lysophosphatidylethanolamine. **Specimen:** - in vitro FABP1 - 464.27 **Principle:** - The most abundant fragment is a fatty acid [M-H]- ion. - 140.0118 PE headgroup must be present. - The carbon count and unsaturation of the highest fatty acid fragment must be the same as it is expected for the whole PE molecule. - Other headgroup ions 196.0380 and 178.0275 add to the score. """ score = 0 fattya = set([]) if ( self.is_fa(0) and self.fa_type_is(0, '-H]-') and self.has_mz(140.0118206) ): score += 5 if self.has_mz(196.0380330): score +=1 if self.has_mz(178.0274684): score += 1 ccs = self.ms1_cc(['PE', 'LysoPE']) for cc in ccs: if len(self.fa_list) and self.fa_list[0][0] == self.cc2int(cc): score += 3 fattya.add(cc) if not fattya: score = 0 return {'score': score, 'fattya': fattya}
[docs] def pc_neg_1(self): """ Examines if a negative mode MS2 spectrum is a Phosphatidylcholine. **Specimen:** - BPI - 804.57 **Principle:** - 168.0431 phosphate+choline-CH3 fragment must be present. - The highest abundant fragment must be a fatty acid [M-H]- fragment. - Lyso-PC fragments complementing the highest [M-H]- fatty acid increase the score. """ score = 0 fattya = set([]) if self.is_fa(0) and self.fa_type_is(0, '-H]-') and self.has_mz(168.0431206): score += 5 fattya = self.fa_combinations('PC') fa_h_ccs = self.matching_fa_frags_of_type('PC', '-H]-') for fa_h_cc in fa_h_ccs: if self.frag_name_present('[Lyso-PC(c%u:%u)-]-' % fa_h_cc): score += 1 return {'score': score, 'fattya': fattya}
[docs] def pi_neg_1(self): """ Examines if a negative MS2 spectrum is Phosphatidylinositol. **Specimen:** - GM2A - 835.52 **Principle:** - Inositolphosphate-H2O fragment 241.0119, metaphosphate 78.9591 and headgroup fragment 152.9958 must be present. - Additional headgroup fragments 96.9696, 259.0224 and 297.0381 increase the score. - Presence of Lyso-PI fragments complementing other [M-H]- fatty acid fragments increase the score. """ score = 0 fattya = set([]) if self.has_mz(241.0118779) and self.has_mz(152.9958366) and \ self.has_mz(78.95905658): score += 5 fattya = self.fa_combinations('PI') for hgfrag_mz in [96.96962158, 259.0224425, 297.0380926]: if self.has_mz(hgfrag_mz): score += 1 fa_h_ccs = self.matching_fa_frags_of_type('PI', '-H]-') for fa_h_cc in fa_h_ccs: for fa_other in [ '[Lyso-PI(C%u:%u)-]-', '[Lyso-PI(C%u:%u)-H2O]-]']: if self.frag_name_present(fa_other % fa_h_cc): score += 1 return {'score': score, 'fattya': fattya}
[docs] def ps_neg_1(self): """ Examines if a negative mode MS2 spectrum is a Phosphatidylserine. **Specimen:** - ORP9 - 788.54 **Principle:** - The most abundant fragment is an [M-H]- fatty acid fragment. - Glycerophosphate fragment 152.9958 must be present. - Metaphosphate 78.9591 increases the score. - Serine-H2O neutral loss 87.0320 adds to the score. - Presence of Lyso-PS and Lyso-PA fragments complementing the highest [M-H]- fatty acid fragment increase the score. """ score = 0 fattya = set([]) if self.is_fa(0) and self.fa_type_is(0, '-H]-') and \ self.mz_among_most_abundant(152.9958366, 5): score += 5 fattya = self.fa_combinations('PS') if not fattya: score = 0 return {'score': score, 'fattya': fattya} if self.has_mz(87.03202840): score += 1 if self.has_mz(78.95905658): score += 1 fa_h_ccs = self.matching_fa_frags_of_type('PS', '-H]-') for fa_h_cc in fa_h_ccs: for fa_other in [ '[Lyso-PS(C%u:%u)-]-', '[Lyso-PA(C%u:%u)-]-']: if self.frag_name_present(fa_other % fa_h_cc): score += 1 return {'score': score, 'fattya': fattya}
[docs] def pg_neg_1(self): """ Examines if a negative mode MS2 spectrum is Phosphatidylglycerol. The result will be the same as `bmp_neg_1`, as in negative mode we do not know a way to distinguish these species. **Specimen:** - GM2A - 799.54 - BPIFB2 - 773.5258 (might be BMP) **Principle:** - The most abundant fragment is a fatty acid [M-H]- ion. - The 152.9958 glycerophosphate fragment must be present. - If Lyso-PG fragment present with carbon count complementing the [M-H]- fatty acid score is higher. - Presence of 171.0064 headgroup fragment adds to the score. """ score = 0 fattya = set([]) if self.is_fa(0) and self.fa_type_is(0, '-H]-') and \ self.has_mz(152.9958366): score += 5 #if self.mz_among_most_abundant(152.9958366, 5): # score -= 3 fattya = self.fa_combinations('PG') if self.has_mz(171.0064016): score += 1 fa_h_ccs = self.matching_fa_frags_of_type('PG', '-H]-') for fa_h_cc in fa_h_ccs: for fa_other in [ 'Lyso-PG(C%u:%u)-]-', 'Lyso-PG(C%u:%u)-H2O]-']: if self.frag_name_present(fa_other % fa_h_cc): score += 1 return {'score': score, 'fattya': fattya}
[docs] def sm_neg_1(self): """ Examines if a negative mode MS2 spectrum is a Sphingomyeline. **Specimen:** - GLTPD1 - 745.55 **Principle:** - Must have a neutral loss of CH3+COOH (60.0211). - Phosphate+choline-CH3 fragment 168.0431 must be present. """ score = 0 fattya = set([]) if self.mz_among_most_abundant(168.0431206) and self.has_nl(60.02113): score += 5 return {'score': score, 'fattya': fattya}
[docs] def sph1p_neg_1(self): """ Examines if a negative mode MS2 spectrum is a Spingosine-1-phosphate. **Specimen:** - Only observed in standard. **Principle:** - Phosphate 78.9590 must be present. """ score = 0 fattya = set([]) if self.has_mz(78.95905658): score += 5 return {'score': score, 'fattya': fattya}
[docs] def cer_neg_1(self): """ Examines if a negative mode MS2 spectrum is Ceramide. **Specimen:** - SEC14L1 - 582.509 **Principle:** - A Ceramide backbone fragment must be among the 2 most abundant. - Ceramide backbone fragments lighter by N or C2N but same carbon count and unsaturation add to the score. """ score = 0 fattya = set([]) if self.fa_among_most_abundant('CerFA', n = 2): score += 5 fattya = self.fa_combinations('Cer', sphingo = True) fa_h_ccs = self.matching_fa_frags_of_type('Cer', 'CerFA(') for fa_h_cc in fa_h_ccs: for fa_other in [ '[CerFA-N(C%u:%u)-]-', '[CerFA-C2N(C%u:%u)-]-']: if self.frag_name_present(fa_other % fa_h_cc): score += 1 return {'score': score, 'fattya': fattya}
[docs] def cerp_neg_1(self): """ Examines if a negative mode MS2 spectrum is a Ceramide-1-phosphate. Gives similar result as Sphingosine-1-phosphate. **Specimen:** - GLTPD1 - 616.47 **Principle:** - The most abundant fragment must be 78.9591 metaphosphate. - Presence of 96.9696 phosphate increase the score. """ score = 0 fattya = set([]) if self.most_abundant_mz_is(78.95905658): score += 5 if self.has_mz(96.96962158): score += 1 return {'score': score, 'fattya': fattya}
[docs] def pc_pos_1(self): """ Examines if a positive mode MS2 spectrum is a Phosphatidylcholine. **Specimen:** - BPI + 786.607 **Principle:** - The most abundant fragment must be choline+phosphate 184.0733. - The 86.0964 ethyl-triethylammonium must be present. - The most abundant fatty acid can not have the same carbon count and unsaturation as the whole molecule (then it is Lyso-PC). - Fragments 104.1069, 124.9998, 60.0808 and 58.0651 increase the score. """ score = 0 fattya = set([]) if ( self.most_abundant_mz_is(184.073323) and self.has_mz(86.096425) and not self.lysopc_pos_1()['score'] ): score += 5 fattya = self.fa_combinations('PC') if self.has_mz(104.106990): score += 1 if self.has_mz(124.999822): score += 1 if self.has_mz(60.080776): score +=1 if self.has_mz(58.065126): score += 1 return {'score': score, 'fattya': fattya}
[docs] def lysopc_pos_1(self): """ Examines if a positive mode MS2 spectrum is a Lysophosphatidylcholine. **Specimen:** - in vitro FABP1 + 522.36 **Principle:** - Choline-phosphate 184.0733, ethyl-triethylammonium 86.0964 and neutral loss 183.0660 must be present. - The latter neutral loss corresponds to a fatty acid+glycerol ion. - The carbon count and unsaturation of this fragment should match that of the whole molecule. """ score = 0 fattya = set([]) if ( self.most_abundant_mz_is(184.073323) and self.has_mz(86.096425) and self.has_nl(183.066045) ): score += 5 fa_mz = self.scan[self.nl_lookup(183.066045),1] ccs = self.ms1_cc(['PC', 'LysoPC']) for cc in ccs: for fa_frag in self.fa_list: if ( fa_frag[0] == self.cc2int(cc) and abs(self.scan[fa_frag[5],1] - fa_mz) < 0.0001 and 'FA+G(' in self.scan[fa_frag[5],7] and self.cc2int(cc)[0] < 21 ): score += 5 fattya.add(cc) if not fattya: score = 0 return {'score': score, 'fattya': fattya}
[docs] def sm_pos_1(self): """ Examines if a positive mode MS2 spectrum is a Sphingomyeline. **Specimen:** - GLTPD1 + 703.57 - GLTPD1 + 813.68 (in vitro) **Principle:** - The following choline fragments must be present: 60.0808, 86.0964, 104.1069, 124.9998 and 184.0733. The last one is the most intensive. - If 58.0651 can be found it adds to the score. """ score = 0 fattya = set([]) if all( map( lambda mz: self.has_mz(mz), [ 60.080776, 86.096425, # 104.106990, # 124.999822, # 184.073323 # ] ) ): score += 5 if self.has_mz(58.0651): score += 1 return {'score': score, 'fattya': fattya}
[docs] def fa_pos_1(self): """ Examines if a positive mode MS2 spectrum is a fatty acid. Here we only check if the most abundant fragment is the fatty acid itself. **Specimen:** - in vitro FABP1 + **Principle:** - The most abundant fragment must be a fatty acid which matches the carbon count and the unsaturation of the whole molecule. """ score = 0 fattya = set([]) self.build_fa_list() if self.is_fa(0): if 'FA' in self.feature.ms1fa: for cc in self.feature.ms1fa['FA']: if len(self.fa_list) and self.cc2int(cc) == self.fa_list[0][0]: score += 5 fattya.add(cc) return {'score': score, 'fattya': fattya}
[docs] def fa_neg_1(self): """ Examines if a negative mode MS2 spectrum is a fatty acid. Here we only check if the most abundant fragment is the fatty acid itself. **Specimen:** - in vitro FABP1 - **Principle:** - The most abundant fragment must be a fatty acid which matches the carbon count and the unsaturation of the whole molecule. """ # these are the same return self.fa_pos_1()
[docs] def cerp_pos_1(self): """ Examines if a positive mode MS2 spectrum is a Ceramide-1-phosphate. **Specimen:** - GLTPD1 + 728.59, 590.45, 702.58, 618.430, 616.415, 640.409 **Principle:** - A sphingosine fragment with double H2O loss must be among the three highest abundant fragments. """ score = 0 fattya = set([]) if self.fa_among_most_abundant('-H2O-H2O+]+', n = 3, sphingo = True): score += 1 return {'score': score, 'fattya': fattya}
[docs] def pe_pos_1(self): """ Examines if a positive mode MS2 spectrum is a Phosphatidylethanolamine. **Specimen:** - BPI + 718.536 **Principle:** - The PE headgroup neutral loss 141.0191 has the highest intensity. - If it is a Lyso-PE score will be zero. """ score = 0 fattya = set([]) if self.nl_among_most_abundant(141.019097, 1): score += 5 fattya = self.fa_combinations('PE') if not fattya and self.lysope_pos_1()['score']: score = 0 return {'score': score, 'fattya': fattya}
[docs] def lysope_pos_1(self): """ Examines if a positive mode MS2 spectrum is a Lysophosphatidylethanolamine. **Specimen:** - in vitro FABP1 + 454.29 **Principle:** - The PE headgroup neutral loss 141.0191 has the highest intensity. - A fatty acid-glycerol fragment should match the carbon count and unsaturation of the whole molecule. """ score = 0 fattya = set([]) if self.nl_among_most_abundant(141.019097, 2): score += 6 if len(self.fa_list): frag1 = self.fa_list[0] ccs = self.ms1_cc(['PE', 'LysoPE']) for cc in ccs: if (frag1[0] == self.cc2int(cc) and self.fa_type_is(frag1[5], 'FA+G(')): score += 1 fattya.add(cc) else: score -= 1 else: score -= 1 return {'score': score, 'fattya': fattya}
[docs] def cer_pos_1(self): """ Examines if a positive mode MS2 spectrum is a Ceramide. **Specimen:** - SEC14L1 + 538.52 - STARD11 + 538.526 **Principle:** - A sphingosine backbone with two H2O loss must be among the 10 most abundant fragments. - Fatty acid [M+H]+ or [M-O]+ fragments or neutral losses complementing the one above increase the score. - Sphingosine backbone fragments with same carbon count and unsaturation with the one with 2 water loss but [Sph-C-2(H2O)]+ or [Sph-H2O]+ add to the score. - The score increases if the following choline fragments can not be found: 58.0651, 104.1070, 124.9998 and 184.0733. - The presence of the following fragments increase the score: 60.0444, 70.0651, 82.0651, 96.0808, 107.0730, 121.0886, 135.1042 and 149.1199. """ score = 0 fattya = set([]) if 'Cer' not in self.feature.ms1fa: ms1uns = None else: # larger unsaturation than the whole molecule # does not make sense ms1uns = max(map(lambda _cc: self.cc2int(_cc)[1], self.feature.ms1fa['Cer'])) if self.fa_among_most_abundant('-H2O-H2O+]+', n = 10, sphingo = True, uns = ms1uns): score += 5 fattya = self.fa_combinations('Cer', sphingo = True) sph_ccs, fa_frags = self.matching_fa_frags_of_type('Cer', '-H2O-H2O+]+', sphingo = True, return_details = True) for cc, fa_frag_names in iteritems(fa_frags): for fa_frag_name in fa_frag_names: if '+H]+' in fa_frag_name: score += 1 if '-O]+' in fa_frag_name: score += 1 if 'NL' in fa_frag_name: score += 1 for sph_cc in sph_ccs: for fa_other in [ '[Sphingosine(C%u:%u)-C-H2O-H2O+]+', '[Sphingosine(C%u:%u)-H2O+]+']: if self.frag_name_present(fa_other % sph_cc): score += 1 if not len( list( filter( lambda mz: self.has_mz(mz), [58.065126, 104.106990, 124.999822, 184.073323] ) ) ): score += 1 score += len( list( filter( lambda mz: self.has_mz(mz), [60.0443902, 70.0651257, 82.0651257, 96.0807757, 107.072951, 121.088601, 135.104251, 149.119901] ) ) ) return {'score': score, 'fattya': fattya}
[docs] def vd_pos_1(self): """ Examines if a positive mode MS2 spectrum is a vitamin D. This method is not implemented, does nothing. """ score = 0 fattya = set([]) return {'score': score, 'fattya': fattya}
[docs] def ms1_cc(self, hgs): """ For a list of headgroups returns the possible carbon counts based on database lookups of MS1 m/z's. Returns set of strings. """ ccs = set([]) for hg in hgs: if hg in self.feature.ms1fa: ccs.update(self.feature.ms1fa[hg]) if hg in self.feature.ms1fa: ccs.update(self.feature.ms1fa[hg]) return ccs
[docs] def is_pe(self): if self.feature.mode == 'pos': return self.pa_pe_ps_pg_pos('PE') else: return self.pe_pc_pg_neg('PE')
[docs] def is_pc(self): if self.feature.mode == 'pos': return self.pc_pos('PC') else: return self.pe_pc_pg_neg('PC')
[docs] def is_pa(self): if self.feature.mode == 'pos': return self.pa_pe_ps_pg_pos('PA') else: return self.pa_ps_neg('PA')
[docs] def is_ps(self): if self.feature.mode == 'pos': return self.pa_pe_ps_pg_pos('PS') else: return self.pa_ps_neg('PS')
[docs] def is_pg(self): if self.feature.mode == 'pos': return self.pa_pe_ps_pg_pos('PG') else: return self.pe_pc_pg_neg('PG')
[docs] def pa_pe_ps_pg_pos(self, hg): return self.mz_among_most_abundant(141.0191) \ and self.fa_among_most_abundant('-O]+', min_mass = 140.0) \ and self.fa_ccs_agree_ms1(hg, '-O]+')
[docs] def pa_ps_neg(self, hg): return self.has_mz(152.9958366) and self.has_mz(78.95905658) \ and self.most_abundant_fa('-H]-') \ and self.fa_ccs_agree_ms1(hg, '-H]-')
[docs] def pe_pc_pg_neg(self, hg): return self.most_abundant_fa('-H]-') \ and self.fa_ccs_agree_ms1(hg, '-H]-')
[docs] def pc_pos(self, hg): return self.mz_most_abundant_fold(184.0733, 3) \ and self.fa_ccs_agree_ms1(hg, head = 4)