Source code for lipyd.ms2

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
#  This file is part of the `lipyd` python module
#
#  Copyright (c) 2015-2018 - EMBL
#
#  File author(s): Dénes Türei (turei.denes@gmail.com)
#
#  Distributed under the GNU GPLv3 License.
#  See accompanying file LICENSE.txt or copy at
#      http://www.gnu.org/licenses/gpl-3.0.html
#
#  Website: http://www.ebi.ac.uk/~denes
#

from __future__ import print_function
from future.utils import iteritems
from past.builtins import xrange, range, reduce

import sys
import imp
import re
import math
import copy
import itertools
import collections
from argparse import Namespace
import numpy as np

from lipyd.common import *
import lipyd.mgf as mgf
import lipyd.mz as mzmod
import lipyd.session as session
import lipyd.settings as settings
import lipyd.lookup as lookup
import lipyd.fragdb as fragdb
import lipyd.moldb as moldb
import lipyd.lipproc as lipproc


ChainFragment = collections.namedtuple(
    'ChainFragment',
    ['c', 'u', 'fragtype', 'chaintype', 'i', 'intensity']
)


[docs]class MS2Identity(collections.namedtuple(
        'MS2IdentityBase',
        [
            'score', 'max_score', 'score_pct',
            'hg', 'chainsum', 'chains', 'chain_details',
            'scan_details',
        ]
    )):
    
    def __new__(
            cls,
            score = 0,
            max_score = 0,
            score_pct = 0,
            hg = None,
            chainsum = None,
            chains = None,
            chain_details = None,
            scan_details = None,
        ):
        
        return super(MS2Identity, cls).__new__(
            cls,
            score,
            max_score,
            score_pct,
            hg,
            chainsum = chainsum,
            chains = chains,
            chain_details = chain_details,
            scan_details = scan_details,
        )
    
    def __str__(self):
        
        return (
            lipproc.full_str(self.hg, self.chains)
                if self.chains else
            lipproc.summary_str(self.hg, self.chainsum)
        )
    
[docs]    def full_str(self):
        
        details = []
        
        details.append('score=%.01f' % self.score_pct)
        
        if self.scan_details:
            
            if self.scan_details.deltart is not None:
                details.append('deltart=%.02f' % self.scan_details.deltart)
            if self.scan_details.sample_id is not None:
                sample_id = self.scan_details.sample_id
                if isinstance(sample_id, tuple):
                    sample_id = ''.join(str(i) for i in sample_id)
                details.append('sample=%s' % sample_id)
            if self.scan_details.scan_id is not None:
                details.append('scan=%u' % self.scan_details.scan_id)
        
        return '%s[%s]' % (
            self.__str__(),
            ','.join(details),
        )
    
[docs]    def summary(self):
        
        return self.__str__(), self.score_pct
    
    def __eq__(self, other):
        
        return (
            isinstance(other, MS2Identity) and
            self.hg == other.hg and
            self.chainsum == other.chainsum and
            self.chains == other.chains
        )


ChainIdentificationDetails = collections.namedtuple(
    'ChainIdentificationDetails',
    ['rank', 'i', 'fragtype']
)
ChainIdentificationDetails.__new__.__defaults__ = (None, None, None)


ScanDetails = collections.namedtuple(
    'ScanDetails',
    ['sample_id', 'scan_id', 'source', 'deltart']
)
ChainIdentificationDetails.__new__.__defaults__ = (None, None, None, None)


[docs]class mz_sorted(object):
    
    def __init__(self, scan):
        
        self.scan = scan
    
    def __enter__(self):
        
        self.scan.sort_mz()
    
    def __exit__(self, extyp, exval, tb):
        
        self.scan.sort_intensity()


[docs]class intensity_sorted(object):
    
    def __init__(self, scan):
        
        self.scan = scan
    
    def __enter__(self):
        
        self.scan.sort_intensity()
    
    def __exit__(self, extyp, exval, tb):
        
        self.scan.sort_mz()


[docs]class ScanBase(object):
    
    def __init__(
            self,
            mzs,
            ionmode,
            precursor = None,
            intensities = None,
            tolerance = None,
            scan_id = None,
        ):
        
        self.tolerance = tolerance or settings.get('ms2_tolerance')
        self.sorted_by = None
        self.mzs = mzs
        self.ionmode = ionmode
        self.adducts = {}
        self.intensities = (
            np.array([1.0] * len(self.mzs))
                if intensities is None else
            intensities
        )
        self.precursor = precursor
        self.scan_id = scan_id
        
        if self.mzs is not np.ndarray:
            
            self.mzs = np.array(self.mzs)
        
        if self.intensities is not np.ndarray:
            
            self.intensities = np.array(self.intensities)
        
        self.annotate()
        self.normalize_intensities()
        
        with mz_sorted(self):
            
            self.iisort = np.argsort(self.intensities)[::-1]
        
        self.irank = np.arange(len(self.mzs))
        self.imzsort  = np.argsort(self.mzs)
        self.sorted_by = 'intensities'
    
[docs]    def reload(self):
        
        modname = self.__class__.__module__
        mod = __import__(modname, fromlist = [modname.split('.')[0]])
        imp.reload(mod)
        new = getattr(mod, self.__class__.__name__)
        setattr(self, '__class__', new)
    
    def __len__(self):
        
        return len(self.mzs)
    
[docs]    def sort_mz(self):
        """
        Sorts the scan by m/z values ascending.
        """
        
        if self.sorted_by == 'mzs':
            
            return
            
        elif hasattr(self, 'imzsort') and self.sorted_by == 'intensities':
            
            self.sort(self.imzsort)
            
        else:
            
            isort = np.argsort(self.mzs)
            self.sort(isort)
        
        self.sorted_by = 'mzs'
    
[docs]    def sort_intensity(self):
        """
        Sorts the scan by intensities descending.
        """
        
        if self.sorted_by == 'intensities':
            
            return
            
        elif hasattr(self, 'iisort') and self.sorted_by == 'mzs':
            
            self.sort(self.iisort)
            
        else:
            
            isort = np.argsort(self.intensities)[::-1]
            self.sort(isort)
        
        self.sorted_by = 'intensities'
    
[docs]    def sort(self, isort):
        """
        Applies sorted indices to the scan.
        """
        
        self.intensities = self.intensities[isort]
        self.mzs = self.mzs[isort]
        
        for attr in ('irank', 'annot', 'inorm'):
            
            if hasattr(self, attr):
                
                setattr(self, attr, getattr(self, attr)[isort])
        
        for ad, data in iteritems(self.adducts):
            
            data['annot'] = data['annot'][isort]
    
[docs]    def annotate(self):
        """
        Annotates the fragments in the scan with identities provided by
        the fragment database.
        """
        
        self.annot = self.get_annot()
    
[docs]    def get_annot(self, precursor = None, tolerance = None):
        """
        Returns array of annotations.
        Makes it possible to use different precursor or tolerance.
        """
        
        precursor = precursor or self.precursor
        tolerance = tolerance or self.tolerance
        
        annotator = fragdb.FragmentAnnotator(
            self.mzs,
            self.ionmode,
            precursor,
            tolerance = tolerance,
        )
        
        return np.array(list(annotator)) # this is array
                                         # only to be sortable
    
[docs]    def normalize_intensities(self):
        """
        Creates a vector of normalized intensities i.e. divides intensities
        by their maximum.
        """
        
        self.imax  = self.intensities.max()
        self.inorm = self.intensities / self.imax


[docs]class Scan(ScanBase):
    
    method_hg = {
        'fa_neg_1': ('FA', ()),
    }
    
    def __init__(
            self,
            mzs,
            ionmode,
            precursor = None,
            intensities = None,
            ms1_records = None,
            scan_id = None,
            sample_id = None,
            source = None,
            deltart = None,
            logger = None,
            verbose = False,
            tolerance = None,
            ms1_tolerance = None,
            rt = None,
        ):
        
        ScanBase.__init__(
            self,
            mzs,
            ionmode,
            precursor,
            intensities,
            tolerance = tolerance,
        )
        
        # get some settings
        self.ms1_tolerance = ms1_tolerance or settings.get('ms1_tolerance')
        self.check_ratio_g = settings.get(
            'even_chain_fragment_intensity_ratios_gl_gpl'
        )
        self.check_ratio_s = settings.get(
            'even_chain_fragment_intensity_ratios_sl'
        )
        self.iratio_logbase = settings.get(
            'chain_fragment_instensity_ratios_logbase'
        )
        self.chain_details = settings.get('ms2_scan_chain_details')
        
        if ms1_records is None and precursor is not None:
            
            # do the database lookup if not provided,
            # this is not efficient but makes possible
            # to easily use standalone `Scan` instances
            # for testing and such
            self.ms1_records = moldb.adduct_lookup(
                precursor, ionmode, tolerance = self.ms1_tolerance
            )
            
        else:
            
            # even if precursor is None, we end up with an empty dict
            self.ms1_records = ms1_records or {}
        
        self.scan_id   = scan_id
        self.sample_id = sample_id
        self.source    = source
        self.deltart   = deltart
        self.rt        = rt
        self.log       = logger
        self.verbose   = verbose
        
        self.scan_details = ScanDetails(
            sample_id = self.sample_id,
            scan_id   = self.scan_id,
            source    = self.source,
            deltart   = self.deltart,
        )
    
[docs]    @classmethod
    def from_mgf(
            cls,
            fname,
            scan_id,
            ionmode,
            sample_id = None,
            precursor = None,
            mgf_charge = None,
            **kwargs
        ):
        
        mgfreader = mgf.MgfReader(fname, charge = mgf_charge)
        sc = mgfreader.scan_by_id(scan_id)
        
        precursor = precursor or mgfreader.precursor_by_id(scan_id)
        
        if sc is not None:
            
            return cls(
                mzs = sc[:,0],
                intensities = sc[:,1],
                ionmode = ionmode,
                precursor = precursor,
                scan_id = scan_id,
                sample_id = sample_id,
                source = fname,
                **kwargs
            )
    
[docs]    def reload(self):
        
        modname = self.__class__.__module__
        mod = __import__(modname, fromlist=[modname.split('.')[0]])
        imp.reload(mod)
        new = getattr(mod, self.__class__.__name__)
        setattr(self, '__class__', new)
    
[docs]    def print_scan(self):
        """
        Prints the list of fragments as an annotated table.
        """
        
        if self.log:
            
            self.log.msg(self.scan_str())
    
[docs]    def show(self):
        """
        Prints the scan table to standard output.
        """
        
        sys.stdout.write(self.scan_str())
    
[docs]    def scan_str(self):
        """
        Returns the scan table as string.
        """
        
        lindent = ' ' * 12
        
        header = '%s\n%s%s\n' % (
            ''.join((
                lindent,
                'Frag. m/z'.ljust(12),
                'Intensity'.ljust(12),
                'Identity'.ljust(36),
                'NL mass'.rjust(12)
            )),
            lindent,
            '=' * 12 * 6
        )
        
        table = '\n'.join((
            ''.join((
                lindent,
                '%12.4f' % self.mz[i],
                '%10u'   % self.intensities[i],
                ann.name,
                (
                    '%12.4f' % self.nl(mz[i])
                        if self.precursor else
                    'NA'.rjust(12)
                )
            ))
            for i in xrange(len(self.mz))
            for ann in (
                self.annot[i]
                    if self.annot[i] else
                (Namespace(name = 'Unknown'),)
            )
        ))
        
        return '%s\n%s\n\n' % (
            self.sample.__str__(),
            header,
            table
        )
    
[docs]    def html_table(self):
        
        # TODO
        pass
    
[docs]    def nl(self, mz, adduct = None):
        """
        For m/z returns the corresponding neutral loss m/z.
        If precursor ion mass is unknown returns `numpy.nan`.
        """
        
        if adduct is None:
            
            return self.precursor - mz if self.precursor else np.nan
            
        else:
            
            self.adduct(adduct)
            
            return self.adducts[adduct]['fake_precursor'] - mz
    
[docs]    def full_list_str(self):
        """
        Returns list of fragments as single string.
        """
        
        return '; '.join(
            '/'.join(
                '%s (%u)' % (
                    ann.name,
                    self.intensities[i]
                )
                    if ann is not None else
                'Unknown (%.03f) (%u)' % (
                    self.mzs[i],
                    self.intensities[i]
                )
            )
            for i in xrange(len(self))
            for ann in (
                self.annot[i]
                    if self.annot[i] else
                (None,)
            )
        )
    
[docs]    def most_abundant_mz(self):
        """
        Returns the m/z of the fragment with highest intensity.
        """
        
        result = self.mzs[0]
        
        if self.verbose:
            
            self.log.msg('\t\t  -- Most abundant m/z is %.03f' % result)
        
        return result
    
[docs]    def mz_match(self, mz_detected, mz):
        """
        Tests if two m/z's can be accepted to be equal.
        """
        
        return lookup.match(mz_detected, mz, self.tolerance)
    
[docs]    def mz_lookup(self, mz):
        """
        Returns the index of the closest m/z value
        detected in the scan if it is within the
        range of tolerance, otherwise None.
        """
        
        self.sort_mz()
        
        imz = lookup.find(self.mzs, mz, self.tolerance)
        i = self.imzsort[imz] if imz else None
        
        self.sort_intensity()
        
        return i
    
[docs]    def has_mz(self, mz):
        """
        Tells if an m/z exists in this scan.
        """
        
        result = self.mz_lookup(mz) is not None
        
        if self.verbose:
            
            self.log.msg(
                '\t\t  -- m/z %.03f occures in this scan? -- %s' % (
                    mz, str(result)
                )
            )
        
        return result
    
[docs]    def has_nl(self, nl, adduct = None):
        """
        Tells if a neutral loss exists in this scan.
        """
        
        result = self.has_mz(self.nl(nl, adduct = adduct))
        
        if self.verbose:
            
            self.feature.msg(
                '\t\t  -- neutral loss of %.03f occures in '
                'this scan? Looked up m/z %.03f - %.03f = %.03f -- %s' % (
                    nl,
                    self.precursor,
                    nl,
                    self.nl(nl, adduct = adduct),
                    str(result)
                )
            )
        
        return result
    
[docs]    def fragment_by_name(self, name, adduct = None):
        """
        Returns the index of a fragment by its name.
        Returns `None` if the fragment does not exist in the scan.
        Returns `False` if the fragment name could not be found in
        the database.
        
        The lookup still goes by m/z, the name first looked up in the
        fragment database and the scan searched for the corresponding m/z.
        The name makes if obvious if this is a charged fragment or a neutral
        loss, hence it is not necessary to provide this information.
        
        Args
        ----
        :param str name:
            Fragment full name as used in the database 2nd column,
            e.g. `PE [P+E] (140.0118)`.
        """
        
        frag = fragdb.by_name(name, self.ionmode)
        
        if frag is not None:
            
            if frag[6] == 0:
                
                return self.nl_lookup(frag[0], adduct = adduct)
                
            else:
                
                return self.mz_lookup(frag[0])
        
        return False
    
[docs]    def has_fragment(self, name, adduct = None):
        """
        Tells if a fragment exists in this scan by its name.
        
        Returns bool or `None` if fragment name could not be found
        in the database.
        """
        
        i = self.fragment_by_name(name, adduct = adduct)
        
        return None if i is False else i is not None
    
[docs]    def nl_lookup(self, nl, adduct = None):
        """
        Looks up if a neutral loss exists in this scan and returns its index.
        """
        
        return self.mz_lookup(self.nl(nl, adduct = adduct))
    
[docs]    def most_abundant_fragment_is(self, name, adduct = None):
        """
        Tells if the fragment name is the highest abundant.
        Returns `None` if the fragment name could not be
        found in the database.
        """
        
        frag = fragdb.by_name(name, self.ionmode)
        
        if frag is not None:
            
            mz = (
                self.nl(frag[0], adduct = adduct)
                    if frag[6] == 0 else
                frag[0]
            )
            
            return self.mz_match(self.mzs[0], mz)
    
[docs]    def fragment_among_most_abundant(self, name, n = 2, adduct = None):
        """
        Tells if the fragment is among the top `n`.
        """
        
        frag = fragdb.by_name(name, self.ionmode)
        
        if frag is not None:
            
            mz = (
                self.nl(frag[0], adduct = adduct)
                    if frag[6] == 0 else
                frag[0]
            )
            
            return self.mz_among_most_abundant(mz, n = n)
    
[docs]    def fragment_percent_of_most_abundant(
            self,
            name,
            percent = 80.0,
            adduct = None,
        ):
        """
        Tells if a fragment has at least certain percent of intensity
        compared to the highest peak.
        """
        
        frag = fragdb.by_name(name, self.ionmode)
        
        if frag is not None:
            
            mz = (
                self.nl(frag[0], adduct = adduct)
                    if frag[6] == 0 else
                frag[0]
            )
            
            return self.mz_percent_of_most_abundant(mz, percent = percent)
    
[docs]    def most_abundant_mz_is(self, mz):
        """
        Tells if the m/z with the highest intensity is `mz`.
        Returns `None` if the fragment name could not be
        found in the database.
        """
        
        result = self.mz_match(self.most_abundant_mz(), mz)
        
        if self.verbose:
            
            self.log.msg(
                '\t\t  -- Is m/z %.03f the most abundant one? -- %s' % (
                    mz,
                    str(result)
                )
            )
        
        return result
    
[docs]    def mz_among_most_abundant(self, mz, n = 2):
        """
        Tells if an m/z is among the most aboundant `n` fragments
        in a spectrum.
        
        Args
        ----
        :param float mz:
            The m/z value.
        :param int n:
            The number of most abundant fragments considered.
        """
        
        self.sort_mz()
        
        i = lookup.find(
            self.mzs[self.irank < n], # intensity rank < n
            mz,
            self.tolerance
        )
        
        self.sort_intensity()
        
        if self.verbose:
            
            self.log.msg(
                '\t\t  -- m/z %.03f is among the %u most abundant? -- %s' % (
                    mz, n, str(i is not None)
                )
            )
        
        return i is not None
    
[docs]    def nl_among_most_abundant(self, nl, n = 2, adduct = None):
        """
        Tells if a neutral loss corresponds to one of the
        most aboundant `n` fragments in a spectrum.
        
        Args
        ----
        :param float nl:
            The mass of the neutral loss.
        :param int n:
            The number of most abundant fragments considered.
        """
        
        result = self.mz_among_most_abundant(
            self.nl(nl, adduct = adduct),
            n = n,
        )
        
        if self.verbose:
            
            self.log.msg(
                '\t\t  -- neutral loss %.03f is among '
                'the %u most abundant? -- %s' % (
                    nl, n, str(result)
                )
            )
        
        return result
    
[docs]    def get_intensity(self, mz):
        """
        Returns the relative intensity of a fragment ion from its m/z.
        Value is `None` if m/z does not present.
        """
        
        i = self.mz_lookup(mz)
        
        if i is not None:
            
            return self.inorm[i]
        
        return None
    
[docs]    def get_nl_intensity(self, nl, adduct = None):
        """
        Returns the relative intensity of a neutral loss fragment ion.
        Value is `None` if neutral loss does not present.
        """
        
        return self.get_intensity(self.nl(nl, adduct = adduct))
    
[docs]    def mz_percent_of_most_abundant(self, mz, percent = 80.0):
        """
        Tells if an m/z has at least certain percent of intensity
        compared to the most intensive fragment.
        
        Args
        ----
        :param float mz:
            The m/z value.
        :param float percent:
            The threshold in percent of the highest intensity.
        
        """
        
        i = self.get_intensity(mz)
        result = i and i >= percent / 100.
        
        if self.verbose:
            
            self.feature.msg(
                '\t\t  -- m/z %.03f has abundance at least %.01f %% of'
                ' the highest abundance? -- %s\n' % (
                    mz, percent, str(result)
                )
            )
        
        return result
    
[docs]    @classmethod
    def match_chtype(cls, value, accepted):
        """
        Matches strings or strings to set of strings, optionally negative.
        Calls `match_chattr` with `typ = basestring`.
        """
        
        return cls.match_chattr(value, accepted, typ = basestring)
    
[docs]    @staticmethod
    def match_chattr(value, accepted, typ = int):
        """
        Args
        ----
        :param int value:
            The actual value.
        :param int,set accepted:
            A single value or a set of values to match against.
            Negative match is possible by providing a tuple with `False`
            as it's first element and the set of not acceptable values
            as the second element.
        """
        
        return (
            accepted is None or
            # simple match
            (isinstance(accepted, typ) and value == accepted) or
            (
                not isinstance(accepted, typ) and (
                    # multiple values
                    value in accepted or (
                        # negation
                        hasattr(accepted, '__getitem__') and
                        accepted[0] == False and
                        value not in accepted[1]
                    )
                )
            )
        )
    
[docs]    @classmethod
    def match_annot(
            cls,
            annot,
            frag_type = None,
            chain_type = None,
            c = None,
            u = None
        ):
        """
        Tests a fragment annotation against criteria of fragment type,
        chain type, carbon count and unsaturation.
        """
        
        return all((
            cls.match_chattr(annot.fragtype, frag_type, typ = basestring),
            cls.match_chattr(annot.chaintype, chain_type, typ = basestring),
            cls.match_chattr(annot.c, c),
            cls.match_chattr(annot.u, u),
        ))
    
[docs]    def highest_fragment_by_chain_type(
            self,
            head = None,
            frag_type = None,
            chain_type = None,
            c = None,
            u = None,
            adduct = None,
        ):
        """
        Returns the highest instensity fragment matching a particular
        chain type.
        Returns fragment index or `None` if no such fragment exists.
        
        Arguments passed to `chain_fragment_type_is`.
        """
        
        frags = self.fragments_by_chain_type(
            head = head,
            frag_type = frag_type,
            chain_type = chain_type,
            c = c,
            u = u,
            adduct = adduct,
        )
        
        try:
            
            return next(frags)
            
        except StopIteration:
            
            return None
    
[docs]    def fragments_by_chain_type(
            self,
            head = None,
            frag_type = None,
            chain_type = None,
            c = None,
            u = None,
            adduct = None,
        ):
        """
        Collects fragments matching a particular chain type.
        Yields indices.
        Arguments passed to `chain_fragment_type_is`.
        """
        
        head = len(self.mzs) if head is None else min(head, len(self.mzs))
        
        for i in xrange(head):
            
            if self.chain_fragment_type_is(
                i,
                frag_type = frag_type,
                chain_type = chain_type,
                c = c,
                u = u,
                return_annot = False,
                adduct = adduct,
            ):
                
                yield i
    
[docs]    def chain_fragment_type_among_most_abundant(
            self,
            n = 2,
            frag_type = None,
            chain_type = None,
            c = None,
            u = None,
            adduct = None,
        ):
        """
        Tells if a particular type of aliphatic chain fragment can be
        found among the `n` highest intensity fragments.
        
        Arguments passed to `chain_fragment_type_is`.
        """
        
        return bool(len(list(
            self.fragments_by_chain_type(
                head = n,
                frag_type = frag_type,
                chain_type = chain_type,
                c = c,
                u = u,
                adduct = adduct,
            )
        )))
    
[docs]    def chain_fragment_type_is(
            self,
            i,
            frag_type = None,
            chain_type = None,
            c = None,
            u = None,
            return_annot = False,
            adduct = None,
        ):
        """
        Tells if an aliphatic chain fragment is a specified type. The type
        should be the string representation of the fragment,
        e.g. `FA-O` for fatty acid minus oxygen fragments.
        
        Returns bool or fragment annotations if `return_annot = True`.
        
        Args
        ----
        :param int i:
            Index of the fragment.
        :param bool return_annot:
            Return iterator with the matching fragment annotations.
        """
        
        if i >= len(self.mzs):
            
            return False
        
        annot = self.annot if adduct is None else self.adduct_annot(adduct)
        
        result = any((
            self.match_annot(an, frag_type, chain_type, c, u)
            for an in annot[i]
        ))
        
        if self.verbose:
            
            criteria = []
            if frag_type is not None:
                criteria.append('of type `%s`' % frag_type)
            if chain_type is not None:
                criteria.append('of chain type `%s`' % chain_type)
            if c is not None:
                criteria.append('with carbon count of %a' % c)
            if u is not None:
                criteria.append('with unsaturation of %a' % u)
            
            self.log.msg(
                '\t\t  -- Fragment #%u (%.03f): '
                'is it a fragment %s? -- %s' % (
                    i,
                    self.mz[i],
                    ' and '.join(criteria),
                    str(result)
                )
            )
        
        if return_annot:
            
            result = (
                an
                for an in annot[i]
                if self.match_annot(an, frag_type, chain_type, c, u)
            )
        
        return result
    
[docs]    def chains_of_type(
            self,
            chain_type = None,
            frag_type = None,
            c = None,
            u = None,
            yield_annot = False,
            adduct = None,
        ):
        """
        Iterates chain fragments matching certain criteria.
        Yields fragment indices or indices with annotations.
        
        Args
        ----
        :param bool yield_annot:
            Yield tuples of indices and annotations instead of indices only.
        """
        
        for i in xrange(len(self.mzs)):
            
            if self.chain_fragment_type_is(
                i = i,
                chain_type = chain_type,
                frag_type = frag_type,
                c = c,
                u = u,
                adduct = adduct,
            ):
                
                if yield_annot:
                    
                    for annot in self.chain_fragment_type_is(
                        i = i,
                        chain_type = chain_type,
                        frag_type = frag_type,
                        c = c,
                        u = u,
                        return_annot = True,
                        adduct = adduct,
                    ):
                        
                        yield i, annot
                    
                else:
                    
                    yield i
    
[docs]    def has_chain_fragment_type(
            self,
            chain_type = None,
            frag_type = None,
            c = None,
            u = None,
            adduct = None,
        ):
        """
        Tells if at least one fragment matches certain criteria.
        Arguments passed to `chain_fragment_type_is`.
        """
        
        return self.highest_fragment_by_chain_type(
            chain_type = chain_type,
            frag_type = frag_type,
            c = c,
            u = u,
            adduct = adduct,
        ) is not None
    
[docs]    def matching_chain_combinations(
            self,
            record,
            head = None,
            intensity_threshold = None,
            expected_intensities = None,
            no_intensity_check = False,
            chain_param = (),
            adduct = None,
        ):
        """
        Provides a way to see if specific chain combinations exist.
        The database record defines the chain layout of the molecule.
        Further arguments are passed to `chain_combinations`.
        The `chain_param` tuple contains dicts to match against chain
        fragments. All of these dicts must match at least one fragment
        identification. Only combinations matching all criteria yielded.
        
        Args
        ----
        :param lipproc.LipidRecord record:
            A lipid database record matching the MS1 m/z.
        :param int head:
            Consider only the n most intensive fragments.
        :param float intensity_threshold:
            Consider only fragments with intensity higher than threshold.
            Relative to highest fragment, between 0 and 1.
        :param expected_intensities:
            See at `intensity_ratios`.
        :param bool no_intensity_check:
            Completely skip checking intensity ratios.
        :param tuple chain_param:
            Tuple of dicts. Each dict contains criteria for one chain moiety.
            Keys can be `chain_type`, `frag_type`, `c` and `u`.
            These can be single str or int values or sets of multiple
            values. If omitted or `None` any value will pass the filter.
            An empty tuple which is the default value will pass through
            everything, this is equivalent with calling `chain_combinations`.
        """
        
        def match(key, param, value):
            
            return (
                key not in param or
                param[key] is None or (
                    type(param[key]) in {int, str} and
                    value == param[key]
                ) or (
                    type(param[key]) in {set, list, tuple} and
                    value in param[key]
                )
            )
        
        if (
            record.chainsum and
            len(record.chainsum) > 1 and
            len(chain_param) == 1
        ):
            
            chain_param = chain_param * len(record.chainsum)
        
        for chains, details in self.chain_combinations(
            record,
            head = None,
            intensity_threshold = 0,
            expected_intensities = None,
            no_intensity_check = False,
            frag_types = None,
            fragment_details = True,
            adduct = adduct,
        ):
            
            if (
                not chain_param or
                all((
                    not param or
                    any((
                        all((
                            match('chain_type', param, ch.typ),
                            match('frag_type', param, details.fragtype[i]),
                            match('c', param, ch.c),
                            match('u', param, ch.u),
                        ))
                        for i, ch in enumerate(chains)
                    ))
                    for param in chain_param
                ))
            ):
                
                yield chains, details
    
[docs]    def has_chain_combination(
            self,
            record,
            head = None,
            intensity_threshold = None,
            expected_intensities = None,
            no_intensity_check = False,
            chain_param = (),
            adduct = None,
        ):
        """
        Tells if a certain chain combination exists in the scan.
        
        Arguments passed to `matching_chain_combinations`.
        """
        
        ccomb = self.matching_chain_combinations(
            record = record,
            head = head,
            intensity_threshold = intensity_threshold,
            expected_intensities = expected_intensities,
            no_intensity_check = no_intensity_check,
            chain_param = chain_param,
            adduct = adduct,
        )
        
        try:
            
            _ = next(ccomb)
            
            return True
            
        except StopIteration:
            
            return False
    
    def _matching_chain_pairs(
            self,
            record,
            chain_type = None,
            frag_type = None,
            c = None,
            u = None,
            partner_chain_types = None,
            partner_frag_types = None,
            count_only = False,
            adduct = None,
        ):
        
        # small caching of constraint matching
        type_pos = {}
        
        def get_positions(self, frag_type):
            
            if frag_type not in type_pos:
                
                type_pos[frag_type] = self.positions_for_frag_type(
                    record, frag_type
                )
            
            return type_pos[frag_type]
        # ##
        
        for i, iannot in self.chains_of_type(
            chain_type = chain_type,
            frag_type = frag_type,
            c = c,
            u = u,
            yield_annot = True,
            adduct = adduct,
        ):
            
            partner_c = record.chainsum.c - annot.c
            partner_u = record.chainsum.u - annot.u
            
            if partner_c < 1 or partner_u < 0:
                
                continue
            
            pos_i = get_positions(iannot.fragtype)
            
            for j, jannot in self.chains_of_type(
                c = partner_c,
                u = partner_u,
                yield_annot = True,
                adduct = adduct,
            ):
                
                if (
                    partner_chain_types is None or
                    jannot.chaintype in partner_chain_types
                ) and (
                    partner_frag_types is None or
                    jannot.fragtype in partner_frag_types
                ):
                    
                    pos_j = get_positions(jannot.fragtype)
                    
                    if (
                        not pos_i or
                        not pos_j or (
                            len(pos_i) == 1 and
                            len(pos_j) == 1 and
                            not pos_i - pos_j
                        )
                    ):
                        
                        continue
                    
                    yield (
                        lipproc.Chain(
                            
                        )
                    )
    
[docs]    def positions_for_frag_type(self, record, frag_type):
        """
        Returns the possible chain positions for a record and a fragment type.
        """
        
        # constraints for the fragment type
        constr = fragdb.constraints(frag_type, self.ionmode)
        # set of possible positions of the chain
        # which this fragment originates from
        return lipproc.match_constraints(record, constr)[1]
    
[docs]    def is_chain(self, i, adduct = None):
        """
        Examines if a fragment has an aliphatic chain.
        """
        
        annot = self.adduct_annot(adduct)
        
        result = any(not np.isnan(an.c) for an in annot[i])
        
        if self.verbose:
            
            self.log.msg(
                '\t\t -- Fragment #%u (%.03f)'
                'has an aliphatic chain? -- %s' % (
                    i,
                    self.mzs[i],
                    str(result)
                )
            )
        
        return result
    
[docs]    def is_chain_type(self, i, typ = 'FA', adduct = None):
        """
        Checks if a fragment might origin from a certain aliphatic
        chain type (e.g. `FA` -- fatty acyl, `FAL` -- fatty alkyl,
        `Sph` -- sphingosin base).
        """
        
        return self.chain_fragment_type_is(
            i, chain_type = typ, adduct = adduct
        )
    
[docs]    def is_fa(self, i, adduct = None):
        """
        Tells if a fragment origins from a fatty acyl moiety.
        """
        
        return self.is_chain_type(i, adduct = adduct)
    
[docs]    def is_fal(self, i, adduct = None):
        """
        Tells if a fragment origins from a fatty alkyl moiety.
        """
        
        return self.is_chain_type(i, 'FAL', adduct = adduct)
    
[docs]    def is_sph(self, i, adduct = None):
        """
        Tells if a fragment origins from a shpingosin backbone.
        """
        
        return self.is_chain_type(i, 'Sph', adduct = adduct)
    
[docs]    def is_type(self, i, typ, adduct = None):
        """
        Tells if a fragment is a certain type.
        """
        
        return self.chain_fragment_type_is(
            i, frag_type = typ, adduct = adduct
        )
    
[docs]    def annot_by_type(
            self,
            i,
            chain_type = None,
            frag_type = None,
            adduct = None,
        ):
        """
        Returns the annotations matching certain types.
        """
        
        annot = self.adduct_annot(adduct)
        
        return tuple(
            an
            for an in annot[i]
            if (
                self.match_chtype(an.chaintype, chain_type) and
                self.match_chtype(an.fragtype,  frag_type)
            )
        )
    
[docs]    def cu_by_type(
            self,
            i,
            chain_type = None,
            frag_type = None,
            adduct = None,
        ):
        """
        Returns `(carbon count, unsaturation)` tuples for fragment `i`
        considering only the the requested chain types and fragment types.
        """
        
        return tuple(
            (a.c, a.u)
            for a in
            self.annot_by_type(
                i,
                chain_type = chain_type,
                frag_type = frag_type,
                adduct = adduct,
            )
        )
    
    def _build_chain_list(self, annot = None):
        """
        Builds a list of chains which facilitates the anlysis of chain
        combinations.
        """
        
        annot = annot if type(annot) is np.ndarray else self.annot
        
        return tuple(
            ChainFragment(
                a.c, a.u, a.fragtype, a.chaintype, i, self.intensities[i]
            )
            for i, aa in enumerate(annot)
            for a in aa
            if a.c and not np.isnan(a.c)
        )
    
[docs]    def build_chain_list(self, rebuild = False):
        
        if (
            not rebuild and
            hasattr(self, 'chain_list')
        ):
            return
        
        self.chain_list = self._build_chain_list()
    
[docs]    def chain_among_most_abundant(
            self,
            head = 1,
            chain_type = None,
            frag_type = None,
            c = None,
            u = None,
            min_mass = None,
            skip_non_chains = False,
            adduct = None,
        ):
        """
        Returns `True` if the defined type of chain fragment can be found
        among the most abundant fragments.
        """
        
        if self.verbose:
            
            self.log.msg(
                '\t\t -- Checking for certain type of chain among the top '
                '%u fragments.' % head
            )
        
        result = any((
            self.chain_fragment_type_is(
                i,
                frag_type = frag_type,
                chain_type = chain_type,
                c = c,
                u = u,
                adduct = adduct,
            )
            for i in (
                xrange(head)
                    if not skip_non_chains else
                itertools.islice(
                    (
                        i for i in xrange(len(self.mzs))
                            if (
                                not skip_non_chains or self.is_chain(i)
                            ) and (
                                min_mass is None or self.mzs[i] >= min_mass
                            )
                    ),
                    head
                )
            )
        ))
        
        if self.verbose:
            
            self.log.msg(
                '\t\t -- Checked certain type of chain among the top '
                '%u fragments. -- %s' % (head, str(result))
            )
        
        return result
    
[docs]    def get_most_abundant_chain(
            self,
            head = 1,
            frag_type = None,
            chain_type = None,
            c = None,
            u = None,
            adduct = None,
        ):
        """
        Looks up the most abundant fatty acid fragment of the given type.
        Returns the fragment index.
        """
        
        for i in xrange(len(self)):
            
            if self.chain_fragment_type_is(
                i,
                frag_type = frag_type,
                chain_type = chain_type,
                c = c,
                u = u,
                adduct = adduct,
            ):
                
                return i
    
[docs]    def chain_percent_of_most_abundant(
            self,
            percent,
            frag_type = None,
            chain_type = None,
            c = None,
            u = None,
            adduct = None,
        ):
        """
        Tells if a certain chain present with an abundance at least the
        given percent of the most abundant fragment.
        
        Args
        ----
        :param float percent:
            Percentage, between 0 and 100.
        """
        
        result = any((
            self.chain_among_most_abundant(
                i,
                frag_type = frag_type,
                chain_type = chain_type,
                c = c,
                u = u,
                adduct = adduct,
            )
            for i in
            itertools.takewhile(
                lambda i:
                    self.inorm[i] > percent / 100.0,
                xrange(len(self.mzs))
            )
        ))
        
        return result
    
[docs]    def mz_most_abundant_fold(self, mz, fold):
        """
        Tells if an m/z is the most abundant fragment
        and it has at least a certain fold higher intensity
        than any other fragment.
        
        Args
        ----
        :param float mz:
            The m/z value.
        :param float fold:
            The m/z must be this times higher than any other.
        """
        
        result = (
            self.most_abundant_mz_is(mz) and (
                len(self.mzs) == 1 or
                self.intensities[1] * fold <= self.intensities[0]
            )
        )
        
        if self.verbose:
            
            self.log.msg(
                '\t\t  -- m/z %.03f is at least %u times higher than '
                'any other? -- %s\n' % (mz, fold, str(result))
            )
        
        return result
    
[docs]    def cer_fa_test(self, i_fa, i_sph, adduct = None):
        
        return (
            self.chain_fragment_type_is(
                i_fa,
                frag_type = 'FA+C2+NH2-O',
                adduct = adduct,
            ) and
            self.chain_fragment_type_id(
                i_sph,
                frag_type = 'Sph-C2H4-NH2-H2O',
                adduct = adduct,
            ) and
            self.intensities[i_fa] > self.intensities[i_sph] * 2
        )
    
[docs]    def has_chain_combinations(self, rec, adduct = None, **kwargs):
        """
        Calls `chain_combinations` only to check if at least one
        conbination explicitely confirmed.
        """
        
        ccomb = self.chain_combinations(rec, adduct = adduct, **kwargs)
        
        try:
            
            _ = next(ccomb)
            
            return True
            
        except StopIteration:
            
            return False
    
[docs]    def chain_combinations(
            self,
            rec,
            head = None,
            intensity_threshold = 0,
            expected_intensities = None,
            no_intensity_check = False,
            frag_types = None,
            fragment_details = None,
            adduct = None,
        ):
        """
        Finds all combinations of chain derived fragments matching the
        total carbon count and unsaturation in a database record.
        
        Yields tuple of chains (`lipproc.Chain` objects).
        
        Arguments not listed here explained at `frags_for_positions`.
        
        Args
        ----
        :param lipproc.LipidRecord rec:
            The database record to match against.
        :param bool no_intensity_check:
            Completely skip checking intensity ratios.
        :param float intensity_threshold:
            Only fragments with intensities above this threshold will be
            considered. Intensities relative to the highest, between 0 and 1.
        :param tuple frag_types:
            See at `frags_for_positions`.
        """
        
        if not rec.chainsum and not rec.chains:
            
            return
        
        self.build_chain_list()
        
        chainsum = rec.chainsum or lipproc.sum_chains(rec.chains)
        
        frags_for_position = self.frags_for_positions(
            rec,
            head = head,
            intensity_threshold = intensity_threshold,
            frag_types = frag_types,
            adduct = adduct,
        )
        
        if len(frags_for_position) != len(rec.chainsum.typ):
            # if one or more chains have no corresponding fragment
            # we do not yield anything;
            # for finding missing those chains `missing_chains`
            # can be used
            return
        
        # iterate all combinations
        for frag_comb in itertools.product(
            *(
                # making a sorted list of lists from the dict
                i[1] for i in
                sorted(frags_for_position.items(), key = lambda i: i[0])
            )
        ):
            
            if (
                sum(frag.c for frag in frag_comb) == chainsum.c and
                sum(frag.u for frag in frag_comb) == chainsum.u
            ):
                
                if (
                    # bypass intensity check
                    no_intensity_check or
                    self._intensity_check(
                        frag_comb, chainsum, expected_intensities
                    )
                ):
                    
                    # now all conditions satisfied:
                    yield self._chains_frag_comb(
                        frag_comb, chainsum, details = fragment_details
                    )
    
[docs]    def frags_for_positions(
            self,
            rec,
            head = None,
            intensity_threshold = 0,
            frag_types = None,
            adduct = None,
        ):
        """
        Returns the possible fragments for each positions (sn1, sn2 in
        glycerophospholipids, sphingosine base and N-acyl in sphingolipids,
        etc).
        
        :param int head:
            If `None` or `numpy.inf` all fragment ions will be considered,
            otherwise only the first most aboundant until the number `head`.
        :param float intensity_threshold:
            Only fragments with intensities above this threshold will be
            considered. Intensities relative to the highest, between 0 and 1.
        :param tuple frag_types:
            Limit the query to certain fragment types in addition to
            built in fragment constraints and other criteria.
            A tuple of tuples with fragment type names can be provided
            each for one position with None values where default fragment
            types should be used. E.g. `(('FA_mH', 'Lyso_PA'), None)` means
            the chain in first position might be found as fatty acid minus
            hydrogen fragment or lysophosphatidic acid fragment, while the
            second position could be anything allowed by the built in
            constraints.
        """
        
        frags_for_position = collections.defaultdict(list)
        
        chain_list = self.adduct_chain_list(adduct)
        
        for frag in chain_list:
            
            if (
                (head and frag.i >= head) or
                self.inorm[frag.i] < intensity_threshold
            ):
                
                break
            
            chpos = self.positions_for_frag_type(rec, frag.fragtype)
            
            for ci in chpos:
                
                if (
                    # frag_types constraints
                    not frag_types or
                    not frag_types[ci] or
                    frag.fragtype in frag_types[ci]
                ):
                    
                    frags_for_position[ci].append(frag)
        
        return dict(frags_for_position)
    
[docs]    def intensity_ratios(
            self,
            intensities,
            frag_indices = None,
            expected = None,
            logbase = None,
        ):
        """
        Tells if the ratio of a list of intensities fits the one in
        `expected` or is more or less even if `expected` is `None`.
        
        :param list intensities:
            List of intensities.
        :param list expected:
            List with expected intensity proportions. E.g. `[1, 1, 2]`
            means the third ion is twice higher intense than the 2 others.
        :param int logbase:
            The fold difference tolerance when comparing intensities.
            E.g. if this is 2, then an almost twice less or more intense
            ion will considered to have similar intensity.
        """
        
        logbase = settings.get('chain_fragment_instensity_ratios_logbase')
        
        if len(intensities) == 1:
            
            return True
        
        if any(i <= 0.0 for i in intensities):
            
            raise ValueError(
                'Negative intensity value encountered'
                '(sample=%s, ion mode=%s, scan=%u)' % (
                    str(self.sample), self.ionmode, self.scan_id
                )
            )
        
        frag_indices = frag_indices or list(range(len(intensities)))
        # to know if one fragment contributes more than one times;
        # intensities divided by the times the fragment is incident
        cntr = collections.Counter(frag_indices)
        
        # by default expecting more or less equal intensities
        if expected is None:
            
            expected = [1.0] * len(intensities)
        
        # intensities corrected by the expected and the counts
        intcorr = [
            ins / expected[i] / cntr[ind]
            for (i, ins), ind in zip(enumerate(intensities), frag_indices)
        ]
        
        return (
            all((
                math.log(co[0], logbase) - math.log(co[1], logbase) <= 1
                for co in itertools.combinations(intcorr, 2)
            ))
        )
    
    def _intensity_check(
            self,
            frag_comb,
            chainsum,
            expected_intensities = None
        ):
        """
        Performs the chain intensity ratio check according to settings.
        """
        
        return (
            not (
                # need to check intensity ratios
                (chainsum.typ[0] == 'Sph' and self.check_ratio_s) or
                (chainsum.typ[0] != 'Sph' and self.check_ratio_g) or
                expected_intensities
            ) or
            self.intensity_ratios(
                # intensity ratios are ok
                intensities = tuple(f.intensity for f in frag_comb),
                frag_indices = tuple(f.i for f in frag_comb),
                expected = expected_intensities,
                logbase = self.iratio_logbase
            )
        )
    
    def _chains_frag_comb(
            self,
            frag_comb,
            chainsum,
            details = None,
            missing_position = None,
            missing_chain = None,
        ):
        """
        Returns a tuple of chains from a fragment annotation combination
        and a database record chain summary object.
        
        Potentially includes a missing chain which does not yield any
        fragment.
        """
        
        # boolean: whether we provide details or not
        details = self.chain_details if details is None else details
        
        return (
            tuple(
                lipproc.Chain(
                    c = frag_comb[ifrag].c,
                    u = frag_comb[ifrag].u,
                    typ = frag_comb[ifrag].chaintype,
                    attr = lipproc.ChainAttr(
                        # take the sphingosine base type
                        # from the chainsum of the record
                        sph = chainsum.attr[ichain].sph,
                        ether = frag_comb[ifrag].chaintype == 'FAL',
                        oh = chainsum.attr[ichain].oh
                    )
                )
                if ifrag is not None else
                missing_chain
                # chain indices and fragment indices
                for ichain, ifrag in iterator_insert(
                    len(chainsum),
                    missing_position,
                )
            ),
            ChainIdentificationDetails(
                rank     = tuple(
                    frag_comb[ifrag].i
                    if ifrag is not None else None
                    for ichain, ifrag in iterator_insert(
                        len(chainsum),
                        missing_position,
                    )
                ),
                i        = tuple(
                    self.inorm[frag_comb[ifrag].i]
                    if ifrag is not None else None
                    for ichain, ifrag in iterator_insert(
                        len(chainsum),
                        missing_position,
                    )
                ),
                fragtype = tuple(
                    frag_comb[ifrag].fragtype
                    if ifrag is not None else None
                    for ichain, ifrag in iterator_insert(
                        len(chainsum),
                        missing_position,
                    )
                ),
            ) if details else None
        )
    
[docs]    def missing_chain(
            self,
            rec,
            missing_position = 1,
            head = None,
            intensity_threshold = 0,
            expected_intensities = None,
            no_intensity_check = False,
            frag_types = None,
            adduct = None,
        ):
        """
        Finds ''missing'' chains i.e. which could complement the chains
        identified among the fragments to fit the species in the record.
        
        Yields tuples with first element a tuple of identified chains and
        as second element the missing chain.
        
        Works a similar way to `chain_combinations`.
        
        Args
        ----
        :param int missing_position:
            Position of the missing chain. 0, 1, 2 are sn1, sn2 and sn3
            positions on glycerol, 0 and 1 are sphingosine base and
            N-acyl in sphingolipids, respectively.
            By default is 1 (sn2 or N-acyl).
        """
        
        chain_list = self.adduct_chain_list(adduct = adduct)
        
        chainsum = rec.chainsum or lipproc.sum_chains(rec.chains)
        
        frags_for_position = self.frags_for_positions(
            rec,
            head = head,
            intensity_threshold = intensity_threshold,
            frag_types = frag_types,
            adduct = adduct,
        )
        
        if missing_position >= len(rec.chainsum.typ):
            
            raise ValueError(
                'No chain known at position %u' % missing_position
            )
        
        if missing_position in frags_for_position:
            
            chains_at_missing = frags_for_position[missing_position]
            del frags_for_position[missing_position]
            
        else:
            
            chains_at_missing = []
        
        # iterate all combinations
        for frag_comb in itertools.product(
            *(
                # making a sorted list of lists from the dict
                i[1] for i in
                sorted(iteritems(frags_for_position), key = lambda i: i[0])
            )
        ):
            
            # if more than one chain missing
            if len(rec.chainsum) - len(frag_comb) > 1:
                
                continue
            
            missing_c = chainsum.c - sum(frag.c for frag in frag_comb)
            missing_u = chainsum.u - sum(frag.u for frag in frag_comb)
            
            # do not yield impossible values
            if missing_c < 1 or missing_u < 0 or missing_u > missing_c - 1:
                
                continue
            
            if (
                # bypass intensity check
                no_intensity_check or
                self._intensity_check(
                    frag_comb, chainsum, expected_intensities
                )
            ):
                
                missing_chain = lipproc.Chain(
                    c = missing_c,
                    u = missing_u,
                    typ = chainsum.typ[missing_position],
                    attr = chainsum.attr[missing_position]
                )
                
                # now all conditions satisfied:
                yield self._chains_frag_comb(
                    frag_comb,
                    chainsum,
                    missing_position = missing_position,
                    missing_chain = missing_chain,
                )
    
[docs]    def cu_complete(self, chainsum, chain = None, c = None, u = None):
        """
        Returns the carbon count and unsaturation needed to complete
        the `chain` or `c` and `u` to fit the `chainsum`.
        
        Returns tuple of c and u.
        """
        
        c = c or chain.c
        u = u or chain.u
        
        return chainsum.c - c, chainsum.u - u
    
[docs]    def iterrecords(self, adducts = None):
        """
        Iterates MS1 records.
        Yields tuple of adduct type and record.
        """
        
        for add, recs in iteritems(self.ms1_records):
            
            if adducts is None or add in adducts:
                
                for rec in recs[1]:
                    
                    yield add, rec
    
[docs]    def records_by_type(self, headgroup, sub = (), adducts = None):
        """
        Iterates MS1 database records with a certain headgroup and subtype.
        """
        
        sub = (
            sub
                if type (sub) is set else
            set(sub)
                if type(sub) is list or type(sub) is tuple else
            set([sub])
        )
        
        for add, rec in self.iterrecords(adducts = adducts):
            
            if rec.hg and rec.hg.main == headgroup and set(rec.hg.sub) == sub:
                
                yield rec
    
[docs]    def first_record(self, headgroup, sub = (), adducts = None):
        """
        Returns the first MS1 database record matching headgroup and subtype.
        """
        
        recbytyp = self.records_by_type(
            headgroup, sub = sub, adducts = adducts
        )
        
        try:
            
            return next(recbytyp)
            
        except StopIteration:
            
            return None
    
[docs]    def identify(self, adducts = None):
        
        result = {}
        
        for add, rec in self.iterrecords(adducts):
            
            if rec.hg is None:
                
                continue
            
            rec_str = rec.summary_str()
            
            if rec_str not in result and rec.hg in idmethods[self.ionmode]:
                
                method = idmethods[self.ionmode][rec.hg]
                
                adduct = None if add in {'[M+H]+', '[M-H]-'} else add
                
                result[rec_str] = tuple(
                    method(
                        record = rec,
                        scan = self,
                        adduct = adduct,
                    ).identify()
                )
        
        return result
    
    #
    # Sphingolipids
    #
    
[docs]    def cer1p_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a Ceramide-1-phosphate.

        **Specimen:**
        
        - GLTPD1 - 616.47
      
        **Principle:**
        
        - The most abundant fragment is 78.9591 metaphosphate.
      - If 96.9696 phosphate present adds to the score.
        
        """
        
        score = 0
        fattya = set([])
        if self.most_abundant_mz_is(78.95905658):
            score += 5
            if self.has_mz(96.96962158):
                score += 1
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def hexcer_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a Hexosyl-Ceramide.

        **Specimen:**
        
        - GLTP - 744.5627
      
        **Principle:**
        
        - Hexose fragments 71.0115, 89.0220 and 101.0219 must present.
      
        """
        
        score = 0
        fattya = set([])
        
        if all(map(lambda mz: self.mz_among_most_abundant(mz, n = 10),
                   # these are 3 fragments found at GLTP
                   [71.0115000, 89.0220000, 101.021900])):
            
            score += 5
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def hexceroh_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a Hexosyl-Ceramide-OH
        ('t'). This method is the same as `hexcer_neg_1`.

        **Specimen:**
        
        - GLTP - 760.557
      
        **Principle:**
        
        - Hexose fragments 71.0115, 89.0220 and 101.0219 must present.
      
        """
        
        return self.hexcer_neg_1()
    
[docs]    def sm_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a Sphingomyeline.

        **Specimen:**
        
        - GLTPD1 - 745.55
      
        **Principle:**
        
        - Must have a neutral loss of CH3+COOH (60.0211).
      - Phosphate+choline-CH3 fragment 168.0431 must be present.
        
        """
        
        score = 0
        fattya = set([])
        
        if self.mz_among_most_abundant(168.0431206) and self.has_nl(60.02113):
            score += 5
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def sph1p_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a Spingosine-1-phosphate.

        **Specimen:**
        
        - Only observed in standard.
      
        **Principle:**
        
        - Phosphate 78.9590 must be present.
      
        """
        
        score = 0
        fattya = set([])
        
        if self.has_mz(78.95905658):
            score += 5
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def cer_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is Ceramide.

        **Specimen:**
        
        - SEC14L1 - 582.509
      
        **Principle:**
        
        - A Ceramide backbone fragment must be among the 2 most abundant.
      - Ceramide backbone fragments lighter by N or C2N but same carbon
          count and unsaturation add to the score.
        
        """
        
        score = 0
        fattya = set([])
        
        if self.fa_among_most_abundant('CerFA', n = 2):
            
            score += 5
            fattya = self.fa_combinations('Cer', sphingo = True)
            fa_h_ccs = self.matching_fa_frags_of_type('Cer', 'CerFA(')
            
            for fa_h_cc in fa_h_ccs:
                
                for fa_other in [
                    '[CerFA-N(C%u:%u)-]-',
                    '[CerFA-C2N(C%u:%u)-]-']:
                    
                    if self.frag_name_present(fa_other % fa_h_cc):
                        
                        score += 1
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def cerp_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a Ceramide-1-phosphate.
        Gives similar result as Sphingosine-1-phosphate.

        **Specimen:**
        
        - GLTPD1 - 616.47
      
        **Principle:**
        
        - The most abundant fragment must be 78.9591 metaphosphate.
      - Presence of 96.9696 phosphate increase the score.
        
        """
        
        score = 0
        fattya = set([])
        
        if self.most_abundant_mz_is(78.95905658):
            
            score += 5
            
            if self.has_mz(96.96962158):
                
                score += 1
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def cerp_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum is a Ceramide-1-phosphate.

        **Specimen:**
        
        - GLTPD1 + 728.59, 590.45, 702.58, 618.430, 616.415, 640.409
      
        **Principle:**
        
        - A sphingosine fragment with double H2O loss must be among the three
        highest abundant fragments.
        
        """
        
        score = 0
        fattya = set([])
        
        if self.fa_among_most_abundant('-H2O-H2O+]+', n = 3, sphingo = True):
            score += 1
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def adduct(self, adduct, reset = False):
        """
        Creates a copy of the current scan assuming that the precursor
        is a certain adduct. The precursor will be converted to [M+H]+
        or [M-H]- adduct and neutral losses will be calculated accordingly.
        """
        
        if adduct in self.adducts and not reset:
            
            return
        
        ad2ex = settings.get('ad2ex')[1][self.ionmode][adduct]
        ex2ad = 'remove_h' if self.ionmode == 'neg' else 'add_h'
        
        fake_precursor = (
            getattr(
                mzmod.Mz(
                    getattr(
                        mzmod.Mz(self.precursor),
                        ad2ex
                    )()
                ),
                ex2ad
            )()
        )
        
        annot = self.get_annot(fake_precursor)
        
        chain_list = self._build_chain_list(annot = annot)
        
        self.adducts[adduct] = {
            'fake_precursor': fake_precursor,
            'annot': annot,
            'chain_list': chain_list,
        }
    
[docs]    def adduct_annot(self, adduct = None):
        """
        Gets the annotations for a certain adduct.
        """
        
        return self.adduct_data('annot', adduct = adduct)
    
[docs]    def adduct_chain_list(self, adduct = None):
        """
        Gets the chain list for a certain adduct.
        """
        
        return self.adduct_data('chain_list', adduct = adduct)
    
[docs]    def adduct_data(self, name, adduct = None):
        
        if adduct is None:
            
            return getattr(self, name)
        
        self.adduct(adduct)
        
        return self.adducts[adduct][name]
    
[docs]    def get_ms1_records(
            self,
            hg,
            subtype = None,
            sph = None,
            ether = None,
            oh = None,
            databases = None,
        ):
        """
        Iterates MS1 records for a given type.
        
        Yields tuples of record, adduct type and error in ppm
        """
        
        subtype = subtype or ()
        
        hg = (
            hg
            if isinstance(hg, lipproc.Headgroup) else
            lipproc.Headgroup(main = hg, sub = subtype)
        )
        
        for add, recs in iteritems(self.ms1_records):
            
            for rec_mz, rec, err_ppm in zip(*recs):
                
                if (
                    rec.hg == hg and (
                        databases is None or rec.lab.db in databases
                    ) and (
                        sph is None or rec.chainsum.attr.sph == sph
                    ) and (
                        ether is None or rec.chainsum.attr.ether == ether
                    ) and (
                        oh is None or rec.chainsum.attr.oh == oh
                    )
                ):
                    
                    yield rec, add, err_ppm


[docs]class AbstractMS2Identifier(object):
    
    class_methods = {}
    subclass_methods = {}
    
    def __init__(
            self,
            record,
            scan,
            adduct = None,
            missing_chains = None,
            explicit_and_implicit = False,
            must_have_chains = True,
            chain_comb_args = {},
            missing_chain_args = {},
        ):
        
        self.score = 0
        self.max_score = 0
        self.rec = record
        self.scn = scan
        self.add = adduct
        self.missing_chains = (
            missing_chains if missing_chains is not None else
            tuple(range(len(record.chainsum))) # any chain can be missing
        )
        self.chain_comb_args = chain_comb_args
        self.missing_chain_args = missing_chain_args or self.chain_comb_args
        self.explicit_and_implicit = explicit_and_implicit
        self.must_have_chains = must_have_chains
        
        self.scores = {}
    
[docs]    def identify(self):
        
        if not self.rec.hg:
            
            return
        
        self.confirm_class()
        
        chains_confirmed = False
        
        for chains in self.confirm_chains_explicit():
            
            yield MS2Identity(
                max(self.score, 0),
                self.max_score,
                self.percent_score(),
                self.rec.hg,
                self.rec.chainsum,
                chains = chains[0],
                chain_details = chains[1],
                scan_details = self.scn.scan_details,
            )
            chains_confirmed = True
        
        if not chains_confirmed or self.explicit_and_implicit:
            
            for chains in self.confirm_chains_implicit():
                
                yield MS2Identity(
                    max(self.score, 0),
                    self.max_score,
                    self.percent_score(),
                    self.rec.hg,
                    self.rec.chainsum,
                    chains = chains[0],
                    chain_details = chains[1],
                    scan_details = self.scn.scan_details,
                )
                
                chains_confirmed = True
        
        if not chains_confirmed and not self.must_have_chains and self.score:
            
            yield MS2Identity(
                max(self.score, 0),
                self.max_score,
                self.percent_score(),
                self.rec.hg,
                self.rec.chainsum,
                chains = None,
                chain_details = None,
                scan_details = self.scn.scan_details,
            )
    
[docs]    def percent_score(self):
        """
        Returns the score as a percentage of the maximum possible score.
        
        Zero maximum score means something is wrong, then it returns 200.
        """
        
        return (
            max(int(np.round(self.score / self.max_score * 100.)), 0)
            if self.max_score else
            200
        )
    
[docs]    def confirm_class(self):
        """
        In this base class pass through everything.
        Most of the subclasses should override this.
        """
        
        self.score = 0
        
        if self.rec.hg is not None and self.rec.hg.main in self.class_methods:
            
            score, max_score = getattr(
                self,
                self.class_methods[self.rec.hg.main]
            )()
            
            self.score += score
            self.max_score += max_score
    
[docs]    def confirm_subclass(self):
        
        subclasses = self.rec.hg.sub or ('empty',)
        
        if self.rec.hg is not None:
            
            for sub in subclasses:
                
                if sub not in self.scores and sub in self.subclass_methods:
                    
                    score, max_score = getattr(
                        self,
                        self.subclass_methods[sub]
                    )()
                    
                    self.scores[sub] = score
                    self.score += score
                    self.max_score += max_score
    
[docs]    def confirm_chains_explicit(self):
        
        return self.scn.chain_combinations(self.rec, **self.chain_comb_args)
    
[docs]    def confirm_chains_implicit(self):
        
        for missing in self.missing_chains:
            
            for chain_comb in self.scn.missing_chain(
                self.rec,
                missing_position = missing,
                **self.missing_chain_args
            ):
                
                yield chain_comb
    
[docs]    def matching_chain_combinations(
            self,
            chain_param1,
            chain_param2,
            score_method = lambda ccomb: (min(ccomb, 3) * 2, 6),
        ):
        
        ccomb = len(list(
            self.scn.matching_chain_combinations(
                self.rec,
                chain_param = (chain_param1, chain_param2),
            )
        ))
        
        score, max_score = score_method(ccomb)
        
        self.score += score
        self.max_score += score
    
[docs]    def check_lyso(self, score_threshold = 5):
        """
        Checks whether the this mass has been identified in the database
        as a lyso species and calls the corresponding lyso identification
        method.
        
        Returns ``True`` if the score from the lyso is larger than
        ``score_threshold``.
        """
        
        rec_lyso = self.scn.first_record(self.rec.hg.main, sub = ('Lyso',))
        
        if rec_lyso:
            
            lyso_hg = lipproc.Headgroup(
                main = self.rec.hg.main,
                sub = ('Lyso',),
            )
            lyso = idmethods[self.scn.ionmode][lyso_hg](rec_lyso, self.scn)
            lyso.confirm_class()
            
            return lyso.score > score_threshold
        
        return False


#
# Lipid identification methods
#

#
# Fatty acids
#

[docs]class FA_Negative(AbstractMS2Identifier):
    """
    Examines if a negative mode MS2 spectrum is a fatty acid.
    Here we only check if the most abundant fragment is the
    fatty acid itself.

    **Specimen:**
    
    - in vitro FABP1 -
    
    **Principle:**
    
    - The most abundant fragment must be a fatty acid which matches
      the carbon count and the unsaturation of the whole molecule.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {
                'head': 1,
                'frag_types': {
                    0: {'FA-H'}
                }
            },
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 10
        
        if (
            self.rec.chainsum and
            self.scn.chain_among_most_abundant(
                frag_type = 'FA-H',
                c = self.rec.chainsum.c,
                u = self.rec.chainsum.u,
            )
        ):
            
            self.score = 10


[docs]class FA_Positive(AbstractMS2Identifier):
    """
    Examines if a positive mode MS2 spectrum is a fatty acid.
    Here we only check if the most abundant fragment is the
    fatty acid itself.

    **Specimen:**
    
    - Not known
    
    **Principle:**
    
    - The most abundant fragment must be a fatty acid which matches
      the carbon count and the unsaturation of the whole molecule.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {
                'head': 1
            }
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 10
        
        if (
            self.rec.chainsum and
            self.scn.chain_among_most_abundant(
                frag_type = 'FA+H',
                c = self.rec.chainsum.c,
                u = self.rec.chainsum.u,
            )
        ):
            
            self.score = 10

#
# Glycerolipids
#

[docs]class DAG_Positive(AbstractMS2Identifier):
    """
    Examines if a positive mode MS2 spectrum is a DAG.

    **Specimen:**
    
    - in vivo: SEC14L2 + 584.52
    - in vitro: BNIP2 + 770.67
    
    **Principle:**
    
    - Combination of fatty acid fragments among the 10 most abundant
      fragments must match the expected carbon count and unsaturation.
    - If these are among the 5 highest fragments the score is higher.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 6
        
        if self.scn.has_chain_combinations(self.rec, head = 10):
            
            self.score += 4
        
        if self.scn.has_chain_combinations(self.rec, head = 6):
            
            self.score += 2


[docs]class DAG_Negative(AbstractMS2Identifier):
    """
    Examines if a negative mode MS2 spectrum is a DAG.

    **Specimen:**
    
    - We don't have yet.
    
    **Principle:**
    
    - Combination of fatty acid fragments among the 10 most abundant
      fragments must match the expected carbon count and unsaturation.
    - If these are among the 5 highest fragments the score is higher.
    
    (Same as in positive ionmode.)
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 6
        
        if self.scn.has_chain_combinations(self.rec, head = 10):
            
            self.score += 4
        
        if self.scn.has_chain_combinations(self.rec, head = 6):
            
            self.score += 2


[docs]class TAG_Positive(AbstractMS2Identifier):
    """
    Examines if a positive mode MS2 spectrum is a TAG.

    **Specimen:**
    
    - STARD11 + 818.7187
    
    **Principle:**
    
    - Combination of fatty acid fragments must match the expected
      carbon count and unsaturation.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 10
        
        if self.scn.has_chain_combinations(self.rec, head = 15):
            
            self.score += 5
        
        if self.scn.has_chain_combinations(self.rec, head = 7):
            
            self.score += 5


[docs]class TAG_Negative(AbstractMS2Identifier):
    """
    Examines if a negative mode MS2 spectrum is a TAG.

    **Specimen:**
    
    - We don't have yet.
    
    **Principle:**
    
    - Combination of fatty acid fragments must match the
      expected carbon count and unsaturation.
    
    (Same as in positive ionmode.)
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.score = 0
        self.max_score = 5
        
        if self.scn.has_chain_combinations(self.rec):
            
            self.score += 5


[docs]class GL_Positive(AbstractMS2Identifier):
    """
    Generic class for identification of glycerolipids.
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            **kwargs,
        )
    
    class_methods = {
        'DGTS': 'dgts',
        'DGCC': 'dgcc',
        'DGTA': 'dgts',
        'DGDG': 'dgdg',
        'MGDG': 'mgdg',
        'SQDG': 'sqdg',
    }
    
[docs]    def confirm_class(self):
        
        if self.rec.hg.main in self.class_methods:
            
            getattr(self, self.class_methods[self.rec.hg.main])()
    
[docs]    def dgts(self):
        
        self.max_score += 30
        
        if self.scn.has_fragment('DGTS [G+TS] (236.1492)'):
            
            self.score += 10
            
            self.score += sum(map(bool,
                (
                    self.scn.has_fragment('DGTS [TS] (144.1019)'),
                    self.scn.has_chain_fragment_type('NL FA-H2O'),
                )
            )) * 10
    
[docs]    def dgcc(self):
        
        self.max_score += 20
        
        if self.scn.has_fragment('PC/SM [Ch+H2O] (104.107)'):
            
            self.score += 10
            
            if self.scn.has_fragment('DGCC [C2+Ch] (132.1388)'):
                
                self.score += 10
    
[docs]    def sqdg(self):
        
        self.max_score += 10
        
        if self.scn.has_fragment('NL [Hexose+SO3+H2O+H] (NL 261.0280)'):
            
            self.score += 10
    
[docs]    def mgdg(self):
        
        self.max_score += 10
        
        if self.scn.has_fragment('[Hexose+H2O-H] (NL 197.07)'):
            
            self.score += 10
    
[docs]    def dgdg(self):
        
        self.max_score += 10
        
        if self.scn.has_fragment('NL [2xHexose+H2O-H] (NL 359.1190)'):
            
            self.score += 10


[docs]class GL_Negative(AbstractMS2Identifier):
    """
    Generic class for identification of glycerolipids.
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score += 10
        
        if self.scn.has_chain_combination(
            record = self.rec,
            chain_param = (
                {'frag_type': {'FA-H', 'FA-'}},
            )
        ):
            
            self.score += 10

#
# Glycerophospholipids
#

[docs]class PE_Negative(AbstractMS2Identifier):
    """
    Examines if a negative mode MS2 spectrum is Phosphatidylethanolamine.

    **Specimen:**
    
    - GM2A - 714.507 and 716.523
    
    **Principle:**
    
    - The most abundant fragment is a fatty acid [M-H]- ion.
    - 140.0118 PE headgroup must be present.
    - Other headgroup ions 196.0380 and 178.0275 add to the score.
    - Lyso-PE and [M-H-CO2]- fatty acid fragments complementing the
      highest [M-H]- fatty acid increase the score.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score += 11
        
        if (
            self.scn.chain_fragment_type_is(
                i = 0,
                chain_type = 'FA',
                frag_type  = 'FA-H'
            ) and
            self.scn.has_fragment('PE [P+E] (140.0118)')
        ):
            
            self.score += 5
            
            self.score += sum(map(bool, (
                self.scn.has_fragment('PE [G+P+E-H2O] (196.0380)'),
                self.scn.has_fragment('PE [G+P+E] (178.0275)'),
            ))) * 3
            
            # by default this returns max 6
            self.matching_chain_combinations(
                {'frag_type': 'FA-H'},
                {'frag_type': {
                        'LysoPE',
                        'LysoPEAlkyl',
                        'LysoPEAlkyl-H2O',
                        'FA-H2O-H'
                    }
                }
            )


[docs]class PE_Positive(AbstractMS2Identifier):
    """
    Examines if a positive mode MS2 spectrum is a
    Phosphatidylethanolamine.

    **Specimen:**
    
    - in vivo BPI + 718.536
    - Lyso-PE: in vitro FABP1 + 454.29
    
    **Principle:**
    
    - The PE headgroup neutral loss 141.0191 has the highest intensity.
    - If it is a Lyso-PE score will be zero.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 20
        
        if self.scn.has_fragment('NL PE [P+E] (NL 141.0191)'):
            
            if self.check_lyso():
                
                return
            
            if self.scn.has_fragment('PE [P+E] (142.0264)'):
                
                self.score += 5
            
            self.score += 15


[docs]class LysoPE_Positive(AbstractMS2Identifier):
    """
    Examines if a positive mode MS2 spectrum is a
    Lysophosphatidylethanolamine.

    **Specimen:**
    
    - in vitro FABP1 + 454.29
    
    **Principle:**
    
    - The PE headgroup neutral loss 141.0191 has the highest intensity.
    - A fatty acid-glycerol fragment should match the carbon count and
      unsaturation of the whole molecule.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 15
        
        if self.scn.has_fragment('NL PE [P+E] (NL 141.0191)'):
            
            self.score = 5
            
            if self.scn.has_fragment('PE [P+E] (142.0264)'):
                
                self.score += 5
            
            self.scn.build_chain_list()
            
            if (
                len(self.scn.chain_list) and
                self.scn.chain_fragment_type_is(
                    self.scn.chain_list[0].i,
                    frag_type = 'FA+Glycerol-OH',
                    c = self.rec.chainsum.c,
                    u = self.rec.chainsum.u,
                )
            ):
                
                self.score += 5


[docs]class PC_Negative(AbstractMS2Identifier):
    """
    Examines if a negative mode MS2 spectrum is a Phosphatidylcholine.

    **Specimen:**
    
    - BPI - 804.57 and 776.545
    
    **Principle:**
    
    - 168.0431 phosphate+choline-CH3 fragment must be present.
    - The highest abundant fragment must be a fatty acid [M-H]- fragment.
    - Lyso-PC fragments complementing the highest [M-H]- fatty acid
      increase the score.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 17
        
        if (
            self.scn.chain_fragment_type_is(
                i = 0,
                chain_type = 'FA',
                frag_type = 'FA-H'
            ) and
            self.scn.has_fragment('PC/SM PO4+choline-CH3 (168.0431)')
        ):
            
            self.score += 5
            
            self.score += sum(map(bool, (
                self.scn.has_fragment('PE [G+P+E-H2O] (196.0380)'),
                self.scn.has_fragment('PE [G+P+E] (178.0275)'),
            ))) * 3
            
            self.matching_chain_combinations(
                {'frag_type': 'FA-H'},
                {'frag_type': 'LysoPC'},
                score_method = lambda ccomb: ((ccomb > 1) * 6, 6),
            )


[docs]class PC_Positive(AbstractMS2Identifier):
    """
    Examines if a positive mode MS2 spectrum is a Phosphatidylcholine.

    **Specimen:**
    
    - BPI + 786.607
    
    **Principle:**
    
    - The most abundant fragment must be choline+phosphate 184.0733.
    - The 86.0964 ethyl-trimetylammonium must be present.
    - The most abundant fatty acid can not have the same carbon count
      and unsaturation as the whole molecule (then it is Lyso-PC).
    - Fragments 104.1069, 124.9998, 60.0808 and 58.0651 increase the
      score.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            must_have_chains = False,
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 13
        
        if (
            self.scn.fragment_percent_of_most_abundant(
                'PC/SM [P+Ch] (184.0733)', 10.0
            ) and
            self.scn.has_fragment('PC/SM [Ch] (86.096)')
        ):
            
            if self.check_lyso():
                
                return
            
            self.score += 5
            
            self.score += sum(map(bool, (
                self.scn.has_fragment('PC/SM [Ch+H2O] (104.107)'),
                self.scn.has_fragment('PC/SM [P+Et] (124.9998)'),
                self.scn.has_fragment('PC/SM [N+3xCH3] (60.0808)'),
                self.scn.has_fragment('PC/SM [Ch-Et] (58.0651)'),
            ))) * 2


[docs]class LysoPC_Positive(AbstractMS2Identifier):
    """
    Examines if a positive mode MS2 spectrum is a Lysophosphatidylcholine.
    
    **Specimen:**
    
    - in vitro FABP1 + 522.36
    
    **Principle:**
    
    - Choline-phosphate 184.0733, ethyl-trimethylammonium 86.0964 and
      neutral loss 183.0660 must be present.
    - The latter neutral loss corresponds to a fatty acid+glycerol ion.
    - The carbon count and unsaturation of this fragment should match
      that of the whole molecule.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            must_have_chains = False,
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 15
        
        if (
            self.scn.most_abundant_fragment_is('PC/SM [P+Ch] (184.0733)') and
            self.scn.has_fragment('PC/SM [Ch] (86.096)')
        ):
            
            self.score += 5
            
            if self.scn.has_fragment('NL PC/SM [P+Ch] (NL 183.066)'):
                
                self.score += 5
            
            if self.scn.has_chain_fragment_type(
                frag_type = {'FA+Glycerol-OH', 'NL FA-H2O'},
                c = self.rec.chainsum.c,
                u = self.rec.chainsum.u,
            ):
                
                self.score += 5


[docs]class PI_Negative(AbstractMS2Identifier):
    """
    Examines if a negative MS2 spectrum is Phosphatidylinositol.

    **Specimen:**
    
    - GM2A - 835.52
    
    **Principle:**
    
    - Inositolphosphate-H2O fragment 241.0119, metaphosphate 78.9591 and
      headgroup fragment 152.9958 must be present.
    - Additional headgroup fragments 96.9696, 259.0224 and 297.0381
      increase the score.
    - Presence of Lyso-PI fragments complementing other [M-H]- fatty
      acid fragments increase the score.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 19
        
        if (
            self.scn.has_fragment('PI [InsP-H2O]- (241.01)') and
            self.scn.has_fragment('PA/PG/PI/PS [G+P] (152.9958)') and
            self.scn.has_fragment('Cer1P/PIP/PL metaphosphate (78.9591)')
        ):
            
            self.score += 5
            
            self.score += sum(map(bool, (
                self.scn.has_fragment('Cer1P/PI phosphate (96.9696)'),
                self.scn.has_fragment('PI [InsP-H]- (259.02)'),
                self.scn.has_fragment('PI [G+P+I] (297.04)'),
                self.scn.has_fragment('PI [InsP-2H2O]- (223.00)'),
            ))) * 2
            
            self.matching_chain_combinations(
                {'frag_type': 'FA-H'},
                {'frag_type': {
                        'LysoPI',
                        'LysoPI-H2O',
                    }
                },
                score_method = lambda ccomb: (min(ccomb, 2) * 3, 6),
            )


[docs]class PI_Positive(AbstractMS2Identifier):
    """
    Examines if a negative MS2 spectrum is Phosphatidylinositol.

    **Specimen:**
    
    - SEC14L2 + 906.60 and 882.6
    
    **Principle:**
    
    - Combinations of fatty acid fragments must match the expected
      carbon count and unsaturation for PI.
    - Presence of neutral losses 259.0219 and 277.0563 adds to the score.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            must_have_chains = True,
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 9
        
        if self.scn.has_chain_combinations(self.rec):
            
            self.score += 1
            
            self.score += sum(map(bool, (
                self.scn.has_fragment('NL PI [P+Ins] (NL 259.0219)'),
                self.scn.has_fragment('NL PI [P+Ins+NH3] (NL 277.0563)'),
            ))) * 4


[docs]class PS_Negative(AbstractMS2Identifier):
    """
    Examines if a negative mode MS2 spectrum is a Phosphatidylserine.

    **Specimen:**
    
    - ORP9 - 788.54
    
    **Principle:**
    
    - The most abundant fragment is an [M-H]- fatty acid fragment.
    - Glycerophosphate fragment 152.9958 must be present.
    - Metaphosphate 78.9591 increases the score.
    - Serine-H2O neutral loss 87.0320 adds to the score.
    - Presence of Lyso-PS and Lyso-PA fragments complementing
      the highest [M-H]- fatty acid fragment increase the score.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            must_have_chains = True,
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 17
        
        if (
            self.scn.has_chain_combinations(self.rec) and
            self.scn.chain_fragment_type_is(
                0, chain_type = 'FA', frag_type = 'FA-H'
            ) and
            self.scn.fragment_among_most_abundant(
                'PA/PG/PI/PS [G+P] (152.9958)', 5
            )
        ):
            
            self.score += 5
            
            self.score += sum(map(bool, (
                self.scn.has_fragment('Cer1P/PIP/PL metaphosphate (78.9591)'),
                self.scn.has_fragment('PS [Ser-H2O] (87.0320)'),
            ))) * 3
            
            self.matching_chain_combinations(
                {'frag_type': 'FA-H'},
                {'frag_type': {'LysoPS', 'LysoPA'}},
                score_method = lambda ccomb: (min(ccomb, 2) * 3, 6),
            )


[docs]class PS_Positive(AbstractMS2Identifier):
    """
    Examines if a positive mode MS2 spectrum is a Phosphatidylserine.

    **Specimen:**
    
    - BPI + 790.56
    
    **Principle:**
    
    - PS headgroup neutral loss 185.0089 must be the highest intensity.
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            must_have_chains = True,
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 5
        
        if self.scn.fragment_among_most_abundant('PS [P+S] (NL 185.0089)', 1):
            
            self.score += 5


[docs]class PG_Negative(AbstractMS2Identifier):
    """
    Examines if a negative mode MS2 spectrum is Phosphatidylglycerol.
    The result will be the same as `bmp_neg_1`, as in negative
    mode we do not know a way to distinguish these species.
    

    **Specimen:**
    
    - GM2A - 799.54
    - BPIFB2 - 773.5258 (might be BMP)
    
    **Principle:**
    
    - The most abundant fragment is a fatty acid [M-H]- ion.
    - The 152.9958 glycerophosphate fragment must be present.
    - If Lyso-PG fragment present with carbon count complementing
      the [M-H]- fatty acid score is higher.
    - Presence of 171.0064 headgroup fragment adds to the score.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            must_have_chains = True,
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 14
        
        if (
            self.scn.has_chain_combinations(self.rec) and
            self.scn.chain_fragment_type_is(
                0, chain_type = 'FA', frag_type = 'FA-H'
            ) and
            self.scn.has_fragment('PA/PG/PI/PS [G+P] (152.9958)')
        ):
            
            self.score += 5
            
            if self.scn.has_fragment('PG headgroup (171.0064)'):
                
                self.score += 3
            
            self.matching_chain_combinations(
                {'frag_type': 'FA-H'},
                {'frag_type': {
                        'LysoPG',
                        'LysoPG-H2O',
                    }
                },
                score_method = lambda ccomb: (min(ccomb, 2) * 3, 6),
            )


[docs]class PG_Positive(AbstractMS2Identifier):
    """
    Examines if a positive mode MS2 spectrum is a Phosphatidylglycerol.
    At in vivo observed only in standard.
    
    **Principle:**
    
    - The PG headgroup neutral loss (189.0402) is the fragment ion
      with the highest intensity?
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 5
        
        if (
            self.scn.most_abundant_fragment_is(
                'NL PG [G+P+NH3] (NL 189.0402)'
            )
        ):
            
            self.score += 5
        
        # alternative for lyso
        if (
            self.rec.hg.sub == ('Lyso',) and
            self.scn.chain_fragment_type_among_most_abundant(
                n = 1,
                frag_type = {'FA+Glycerol-OH'},
                c = self.rec.chainsum.c,
                u = self.rec.chainsum.u,
            )
        ):
            
            self.score += 5
            self.max_score += 5


[docs]class BMP_Negative(PG_Negative):
    """
    Examines if a negative mode MS2 spectrum is Bismonoacylglycerophosphate.
    The result will be the same as for PG, as in negative
    mode we do not know a way to distinguish these species.
    

    **Specimen:**
    
    - GM2A - 799.54
    - BPIFB2 - 773.5258 (might be BMP)
    
    **Principle:**
    
    - The most abundant fragment is a fatty acid [M-H]- ion.
    - The 152.9958 glycerophosphate fragment must be present.
    - If Lyso-PG fragment present with carbon count complementing
      the [M-H]- fatty acid score is higher.
    - Presence of 171.0064 headgroup fragment adds to the score.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        PG_Negative.__init__(self, record, scan, **kwargs)


[docs]class BMP_Positive(AbstractMS2Identifier):
    """
    Examines if a positive mode MS2 spectrum
    is a Bismonoacylglycerophosphate.

    **Specimen:**
    
    - BPIFB2 + 792.57
    
    **Principle:**
    
    - A glycerol+fatty acid fragment can be found among the 3 highest?
    - The PG headgroup neutral loss (189.0402) is among the fragments?
    - If so, does it have a lower intensity than half of the fatty
      acid+glycerol fragment?
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            must_have_chains = True,
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 10
        
        if (
            self.scn.has_chain_combinations(self.rec, head = 15) and
            self.scn.chain_fragment_type_among_most_abundant(
                chain_type = 'FA', frag_type = 'FA+Glycerol-OH', n = 3
            )
        ):
            
            self.score += 5
            
            i_hg = self.scn.fragment_by_name('NL PG [G+P+NH3] (NL 189.0402)')
            
            if i_hg is not None:
                
                i_gfa = self.scn.highest_fragment_by_chain_type(
                    head = 4, frag_type = 'FA+Glycerol-OH'
                )
                
                if self.scn.intensities[i_gfa] < self.scn.intensities[i_hg]:
                    
                    self.score = 0
                
                else:
                    
                    self.score += 5


[docs]class PA_Negative(AbstractMS2Identifier):
    """
    Examines if a negative mode MS2 spectrum is Phosphatidylglycerol.
    The result will be the same as `bmp_neg_1`, as in negative
    mode we do not know a way to distinguish these species.
    

    **Specimen:**
    
    - GM2A - 799.54
    - BPIFB2 - 773.5258 (might be BMP)
    
    **Principle:**
    
    - The most abundant fragment is a fatty acid [M-H]- ion.
    - The 152.9958 glycerophosphate fragment must be present.
    - If Lyso-PG fragment present with carbon count complementing
      the [M-H]- fatty acid score is higher.
    - Presence of 171.0064 headgroup fragment adds to the score.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            must_have_chains = True,
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 25
        
        if (
            self.scn.has_chain_combinations(self.rec) and
            self.scn.chain_fragment_type_is(
                0, chain_type = 'FA', frag_type = 'FA-H'
            ) and
            self.scn.fragment_among_most_abundant(
                'PA/PG/PI/PS [G+P] (152.9958)', 10
            ) and
            self.scn.fragment_among_most_abundant(
                'Cer1P/PIP/PL metaphosphate (78.9591)', 10
            )
        ):
            
            self.score += 20
            
            if self.scn.has_fragment('Cer1P/PI phosphate (96.9696)'):
                
                self.score += 5


[docs]class PA_Positive(AbstractMS2Identifier):
    """
    Examines if a positive mode MS2 spectrum is a Phosphatidylglycerol.
    At in vivo observed only in standard.
    
    **Principle:**
    
    - The PG headgroup neutral loss (189.0402) is the fragment ion
      with the highest intensity?
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 20
        
        if (
            self.scn.fragment_among_most_abundant(
                'NL [P] (NL 97.9769)', 3, adduct = self.add
            )
        ):
            
            self.score += 10
            
            if self.scn.has_chain_combination(
                self.rec,
                chain_param = ({
                    'frag_type': {
                        'FA+Glycerol-OH',
                        'FA-OH',
                        'FA-H2O-OH',
                    }
                },)
            ):
                
                self.score += 10

#
# Vitamins
#

[docs]class VA_Positive(AbstractMS2Identifier):
    """
    Examines if a positive MS2 spectrum is vitamin A (retinol).

    **Specimen:**
    
    - in vivo RBP1 + 269.2245
    - in vivo RBP4 + 269.2245
    
    **Principle:**
    
    - The most abundant ion is the whole molecule m/z = 269.224.
    - Presence off 3 other ions adds to the score but not
      mandatory: 213.165, 145.1027, 157.1028.
    
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            must_have_chains = False,
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 8
        
        if self.scn.fragment_among_most_abundant('Retinol I (269.2264)', 3):
            
            self.score += 5
            
            self.score += sum(map(bool, (
                self.scn.has_fragment('Retinol II (213.1637)'),
                self.scn.has_fragment('Retinol III (157.1012)'),
                self.scn.has_fragment('Retinol IV (145.1012)'),
            )))


[docs]class VA_Negative(AbstractMS2Identifier):
    """
    Examines if a positive MS2 spectrum is vitamin A (retinol).

    **Specimen:**
    
    - Standards 141020 negative scan 673
    
    **Principle:**
    
    - 3 fragments seems to always present and be among the most abundant:
      79.055, 119.087 and 255.212; presence of these is the main condition.
    - We also detected 125.061 in our standards which is special because
      contains 2 oxygens; presence of this increase the score.
    """
    
    def __init__(self, record, scan, **kwargs):
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            must_have_chains = False,
            **kwargs,
        )
    
[docs]    def confirm_class(self):
        
        self.max_score = 8
        
        if all((
            self.scn.fragment_among_most_abundant(fragname, 7)
            for fragname in (
                'Retinoic acid I (79.0553)',
                'Retinoic acid II (119.0866)',
                'Retinoic acid IV (255.2118)',
            )
        )):
            
            self.score += 5
            
            if self.scn.has_fragment('Retinoic acid III (125.0608)'):
                
                self.score += 3

#
# Sphingolipids
#

[docs]class Cer_Positive(AbstractMS2Identifier):
    """
    Examines if a positive mode MS2 spectrum is a ceramide.
    Identifies ceramide varieties including sphingomyeline,
    ceramide-1-phosphare, ceramide-phosphoethanolamine,
    OH-acyl-ceramide, hexosyl and dihexosyl-ceramides,
    and d, t and DH long chain base varieties.
    
    dCer
    ====
    
    **Specimen:**
    
    - Sphingolipid standards scans 
    - SEC14L1 + 538.52
    - STARD11 + 538.526
    
    **Principle:**
    
    - A sphingosine backbone with two H2O loss must be among the
      10 most abundant fragments.
    - Fatty acid [M+H]+ or [M-O]+ fragments or neutral losses
      complementing the one above increase the score.
    - Sphingosine backbone fragments with same carbon count and
      unsaturation with the one with 2 water loss but [Sph-C-2(H2O)]+
      or [Sph-H2O]+ add to the score.
    - The score increases if the following choline fragments
      can not be found: 58.0651, 104.1070, 124.9998 and 184.0733.
    - The presence of the following fragments increase the score:
      60.0444, 70.0651, 82.0651, 96.0808, 107.0730, 121.0886,
      135.1042 and 149.1199.
    
    DHCer
    =====
    
    **Specimen:**
    
    - Standards 180626 m/z 568.56 scan 2367
    
    **Principle:**
    
    - Same pattern as at dCer but from the sphingosine derived fragments
      it becomes clear if it has no unsaturation.
    
    tCer
    ====
    
    **Specimen:**
    
    - Standards 180628 m/z 584.56 scan 2070
    
    **Principle:**
    
    - Has Sph+H2O-H fragment which does not occur at dCer.
    - Sph-H2O-H and Sph-H are much higher abundant than at dCer.
    - Strong fragment at 60.044 which is missing at dCer.
    - H2O and 2xH2O neutral losses are much higher than at dCer.
    
    dCer-2OH-acyl
    =============
    
    **Specimen:**
    
    - Standards 180615 m/z 584.22 scan 2421
    
    **Principle:**
    
    - Same as other ceramides. It's d and DH forms are isobaric with tCer
      but d and t are clearly distinguishable so this does not cause
      confusion.
    
    dCer-1-P
    ========
    
    **Specimen:**
    
    - in vivo GLTPD1 + 728.59
    
    **Principle:**
    
    - A shpingosine backbone with 2 H2O loss must be among the 3 highest
      intensity fragments.
    - Presence of any of the following fragments increases the score:
      82.0651, 115.9875.
      107.0729, 135.1043, 149.1199.
    
    Hex-dCer
    ========
    
    **Specimen:**
    
    - in vivo GLTP + 810.68
    
    **Principle:**
    
    - Hexose fragments 198.0740, 180.0634 and 162.0528 must present.
      These are neutral losses of hexose, hexose-H2O and hexose+H2O
    
    Hex-tCer
    ========
    
    **Specimen:**
    
    - in vivo GLTP + 826.67
    - in vitro GLTP + 826.67, 800.66,
    
    **Principle:**
    
    - Hexose fragments 198.0740, 180.0634 and 162.0528 must present.
      These are neutral losses of hexose, hexose-H2O and hexose+H2O
    
    Hex2-dCer
    =========
    
    **Specimen:**
    
    - in vivo GLTP + 988.73
    
    **Principle:**
    
    - Loss of double hexose with our without extra water or water loss
      are the characteristic fragments of this class.
    
    SHex-dCer
    =========
    
    **Specimen:**
    
    - in vitro 890.64
    
    dSM & DHSM
    ==========
    
    **Specimen:**
    
    - in vivo GLTPD1 + 703.57
    - in vitro GLTPD1 + 813.68
    
    **Principle:**
    
    - The following choline fragments must be present: 60.0808, 86.0964,
      104.1069, 124.9998 and 184.0733. The last one is the most intensive.
    - If 58.0651 can be found it adds to the score.
    - dSM and DHSM are not distinguishable in our settings. Maybe the
      [Sph-2xH2O+H]+ ion (264 @ 18:1) presents more often at d and only
      eventually at DH.
    
    PE-Cer
    ======
    
    We do not have this in standards or in screens so we can not test this.
    Based on Amiar 2016 and Narayanaswamy 2014.
    
    **Principle:**
    
    - Neutral loss of 141.0191 must be present.
    - 142.0264 phospho-ethanolamine fragment and neutral loss of
      phospho-ethanolamine+water might be present.
    - Sph-2xH2O fragment increases the score.
    
    """
    
    class_methods = {
        'SM': 'sm',
        'Sph': 'sph',
    }
    
    subclass_methods = {
        '1P': 'cer1p',
        'Hex': 'hexcer',
        'Hex2': 'hex2cer',
        'SHex': 'shexcer',
        'SHex2': 'shex2cer',
        'PE': 'pe_cer',
        'M2': 'm2',
        'M1': 'm1',
        'M3': 'm3',
        'PC': 'pc',
        'empty': 'cer',
    }
    
    def __init__(self, record, scan, **kwargs):
        
        self.nacyl = record.chainsum is not None and len(record.chainsum) > 1
        self.oacyl = record.chainsum is not None and len(record.chainsum) > 2
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (1,) if self.nacyl else (),
            chain_comb_args = {},
            must_have_chains = True,
            **kwargs,
        )
        
        self.sph_scores     = {}
        self.sph_max_scores = {}
        self.fa_scores      = {}
        self.fa_max_scores  = {}
    
[docs]    def confirm_class(self):
        
        AbstractMS2Identifier.confirm_class(self)
        
        self.max_score += 2
        
        self.score += sum(map(
            self.scn.has_fragment,
            (
                'NL [H2O] (NL 18.0106)',
                'NL [2xH2O] (NL 36.0211)',
            )
        ))
        
        self.confirm_subclass()
    
[docs]    def confirm_chains_explicit(self):
        """
        Most of the time we don't really have fatty acid derived
        fragments from ceramides in positive mode. However certain
        sphingosin base derived fragments correspond to neutral losses
        of fatty acids as the molecule is composed of a sphingosine
        and a fatty acid but this is redundant. Hence we call both
        explicit and implicit identification as practically the it
        is implicit anyways.
        """
        
        for chains in itertools.chain(
            AbstractMS2Identifier.confirm_chains_explicit(self),
            AbstractMS2Identifier.confirm_chains_implicit(self),
        ):
            
            if chains[0][0].attr.sph == self.rec.chainsum.attr[0].sph:
                
                # the sphingosin base and fatty acyl related part of the
                # score is valid only for the current chain combination
                # hence now we add these to the overall score, yield the
                # identification and then subtract them from the score
                sph_score, sph_max_score = self.sphingosine_base(
                    chains[0][0].attr.sph
                )
                
                self.score += sph_score
                self.max_score += sph_max_score
                
                if self.nacyl:
                    
                    fa_score, fa_max_score = self.fatty_acyl(chains[0][1])
                    self.score += fa_score
                    self.max_score += fa_max_score
                
                yield chains
                
                self.score -= sph_score
                self.max_score -= sph_max_score
                
                if self.nacyl:
                    
                    self.score -= fa_score
                    self.max_score -= fa_max_score
    
[docs]    def fatty_acyl(self, fa):
        
        score = 0
        max_score = 0
        
        if fa.attr.oh:
            
            if self.add == '[M-H2O+H]+':
                
                score -= 20
        
        return score, max_score
    
[docs]    def cer(self):
        
        
        score = 0
        max_score = 0
        
        non_hex_score, non_hex_max_score = self.non_hex()
        
        score += non_hex_score
        max_score += non_hex_max_score
        
        return score, max_score
    
[docs]    def non_hex(self):
        
        score = 0
        max_score = 0
        
        score -= sum(map(bool,
            (
                self.scn.has_fragment('NL [Hexose-H2O] (NL 162.05)'),
                self.scn.has_fragment('NL [Hexose] (NL 180.06)'),
                self.scn.has_fragment('NL [Hexose+H2O] (NL 198.07)'),
                self.scn.has_fragment('NL [2xHexose] (NL 342.1162)'),
                self.scn.has_fragment('NL [2xHexose+H2O] (NL 360.1268)'),
                self.scn.has_fragment('NL [2xHexose-H2O] (NL 324.1056)'),
                self.scn.has_fragment('NL [2xHexose+O] (NL 358.1111)'),
                self.scn.has_fragment('NL [2xHexose+C] (NL 372.1268)'),
                self.scn.has_fragment('NL [S] (NL 79.9568)'),
                self.scn.has_fragment('NL [S+H2O] (97.9674)'),
                self.scn.has_fragment('NL [Hexose+SO3] (NL 242.100)'),
                self.scn.has_fragment('NL [Hexose+SO3+H2O] (NL 260.0202)'),
                self.scn.has_fragment('NL [Hexose+SO3+2xH2O] (NL 278.0308)'),
                self.scn.has_fragment('NL [2xHexose+SO3] (NL 404.0625)'),
                self.scn.has_fragment('NL [2xHexose+SO3+H2O] (NL 422.0730)'),
                self.scn.has_fragment('NL [2xHexose+SO3+2xH2O] (NL 440.0836)'),
            )
        )) * 5
        
        return score, max_score
    
[docs]    def sm(self):
        
        score = 0
        max_score = 47
        
        if self.scn.most_abundant_fragment_is('PC/SM [P+Ch] (184.0733)'):
            
            score += 15
            
            score += sum(map(bool,
                (
                    self.scn.has_fragment('PC/SM [N+3xCH3] (60.0808)'),
                    self.scn.has_fragment('PC/SM [Ch] (86.096)'),
                    self.scn.has_fragment('PC/SM [Ch+H2O] (104.107)'),
                    self.scn.has_fragment('PC/SM [P+Et] (124.9998)'),
                    self.scn.has_fragment('PC/SM [Ch-Et] (58.0651)'),
                    self.scn.has_fragment('NL PC/SM [P+Ch] (NL 183.066)'),
                    self.scn.has_fragment('NL SM [P+Ch] (NL 201.0766)'),
                    self.scn.has_fragment('NL SM [N+3xCH3] (77.0841)'),
                    self.scn.has_fragment('NL [H2O] (NL 18.0106)'),
                )
            )) * 3
            
            if self.scn.has_chain_fragment_type(frag_type = 'Sph-2xH2O+H'):
                
                score += 5
            
            self.must_have_chains = False
        
        return score, max_score
    
[docs]    def pc(self):
        """
        Lyso-SM aka Sph-PC.
        
        Scherer 2010, Table 1.
        """
        
        score = 0
        max_score = 15
        
        if self.scn.most_abundant_fragment_is('PC/SM [P+Ch] (184.0733)'):
            
            score += 15
        
        return score, max_score
    
[docs]    def pe_cer(self):
        
        score = 0
        max_score = 30
        
        if self.scn.has_fragment('NL PE [P+E] (NL 141.0191)'):
            
            score += 15
            
            score += sum(map(bool,
                (
                    self.scn.has_fragment('PE [P+E] (142.0264)'),
                    self.scn.has_fragment('NL PE [P+E+H2O] (NL 159.0297)'),
                    self.scn.has_fragment('NL PE [P+E-H2O] (NL 123.0085)'),
                )
            )) * 5
        
        return score, max_score
    
[docs]    def cer1p(self):
        
        score = 0
        max_score = 31
        
        if self.scn.has_fragment('NL [P+H2O] (NL 115.9875)'):
            
            score += 10
        
        if self.scn.chain_among_most_abundant(3, frag_type = 'Sph-2xH2O-H'):
            
            score += 10
        
        if self.scn.has_chain_combination(
            self.rec,
            chain_param = (
                {'frag_type': {
                        'Sph-2xH2O+H',
                        'Sph-H2O+H',
                        'Sph-H2O-H'
                    }
                },
                {'frag_type': 'FA+NH+C2H2-OH'},
            )
        ):
            
            score += 5
        
        score += sum(map(bool,
            (
                self.scn.has_fragment('NL [P] (NL 79.9663)'),
                self.scn.has_fragment('NL [P] (NL 97.9769)'),
            )
        )) * 3
        
        non_hex_score, non_hex_max_score = self.non_hex()
        
        score += non_hex_score
        
        return score, max_score
    
[docs]    def hexcer(self):
        
        score = 0
        max_score = 14
        
        score += sum(map(bool,
            (
                self.scn.has_fragment('NL [Hexose-H2O] (NL 162.05)'),
                self.scn.has_fragment('NL [Hexose] (NL 180.06)'),
                self.scn.has_fragment('NL [Hexose+H2O] (NL 198.07)'),
            )
        )) * 3
        
        if self.hexcer_chain_combination():
            
            score += 5
        
        return score, max_score
    
[docs]    def hex2cer(self):
        
        score = 0
        max_score = 39
        
        score += sum(map(bool,
            (
                self.scn.has_fragment('NL [2xHexose] (NL 342.1162)'),
                self.scn.has_fragment('NL [2xHexose+H2O] (NL 360.1268)'),
            )
        )) * 10
        
        score += sum(map(bool,
            (
                self.scn.has_fragment('NL [2xHexose-H2O] (NL 324.1056)'),
                self.scn.has_fragment('NL [2xHexose+O] (NL 358.1111)'),
                self.scn.has_fragment('NL [2xHexose+C] (NL 372.1268)'),
            )
        )) * 3
        
        if self.hexcer_chain_combination():
            
            score += 10
        
        return score, max_score
    
[docs]    def hexcer_chain_combination(self):
        
        return self.scn.has_chain_combination(
            self.rec,
            chain_param = (
                {'frag_type': {
                        'Sph-2xH2O+H',
                        'Sph-2xH2O-H',
                        'Sph-H2O+H',
                        'Sph-H2O-H',
                        'Sph-C-2xH2O',
                    }
                },
                {'frag_type': {
                        'FA-OH',
                        'NL FA',
                        'FA+NH2-O',
                    }
                },
            )
        )
    
[docs]    def shexcer(self):
        
        score = 0
        max_score = 25
        
        score += sum(map(bool,
            (
                self.scn.has_fragment('NL [S] (NL 79.9568)'),
                self.scn.has_fragment('NL [S+H2O] (97.9674)'),
                self.scn.has_fragment('NL [Hexose+SO3] (NL 242.100)'),
                self.scn.has_fragment('NL [Hexose+SO3+H2O] (NL 260.0202)'),
                self.scn.has_fragment('NL [Hexose+SO3+2xH2O] (NL 278.0308)'),
            )
        )) * 5
        
        return score, max_score
    
[docs]    def shex2cer(self):
        
        score = 0
        max_score = 25
        
        score += sum(map(bool,
            (
                self.scn.has_fragment('NL [S] (NL 79.9568)'),
                self.scn.has_fragment('NL [S+H2O] (97.9674)'),
                self.scn.has_fragment('NL [2xHexose+SO3] (NL 404.0625)'),
                self.scn.has_fragment('NL [2xHexose+SO3+H2O] (NL 422.0730)'),
                self.scn.has_fragment('NL [2xHexose+SO3+2xH2O] (NL 440.0836)'),
            )
        )) * 5
        
        return score, max_score
    
[docs]    def m2(self):
        
        score = 0
        max_score = 46
        
        if self.scn.has_fragment('PC/SM [Ch-Et] (58.0651)'):
            
            score += 10
        
        if (
            self.rec.chainsum.u > 0 and
            self.scn.has_fragment('[C7+NH2] (110.0964)')
        ):
            
            score += 10
        
        score += sum(map(bool, (
            self.scn.has_fragment('[C5+NH2+2H] (84.0808)'),
            self.scn.has_fragment('[C6+NH2] (96.0808)'),
        ))) * 3
        
        if self.scn.has_chain_combination(
                self.rec,
                chain_param = (
                    {
                        'frag_type': {
                            'Sph-2xH2O+CH3',
                            'Sph-O-H2O+CH3+H',
                            'Sph-2xH2O+2xCh3+H',
                            'Sph-H2O+2xCH3+H',
                        }
                    },
                )
            ):
            
            score += 20
        
        return score, max_score
    
[docs]    def m1(self):
        
        score = 0
        max_score = 30
        
        if self.scn.has_fragment('PC/SM [Ch-Et] (58.0651)'):
            
            score += 10
        
        if self.scn.has_chain_combination(
                self.rec,
                head = 10,
                chain_param = (
                    {
                        'frag_type': {
                            'Sph-2xH2O+CH3',
                            'Sph-O-H2O+CH3+H',
                            'Sph-H2O+CH3+H',
                        }
                    },
                )
            ):
            
            score += 20
        
        if self.scn.has_chain_fragment_type(
                frag_type = {'Sph-2xH2O+2xCH3+H', 'Sph-H2O+2xCH3+H'},
                c = self.rec.chainsum.c - 1,
                u = self.rec.chainsum.u,
            ):
            
            score -= 20
        
        return score, max_score
    
[docs]    def m3(self):
        
        score = 0
        max_score = 20
        
        if self.scn.fragment_among_most_abundant(
            3, 'PC/SM [N+3xCH3] (60.0808)'
        ):
            
            score += 20
        
        return score, max_score
    
[docs]    def sph(self):
        
        score = 0
        max_score = 9
        
        score += sum(map(bool,
            (
                self.scn.has_fragment('[C3+NH2] (56.0495)'),
                self.scn.has_fragment('[C2+NH2+O] (60.0444)'),
                self.scn.has_fragment('[C4+NH2+OH] (86.0600)'),
            )
        )) * 3
        
        return score, max_score
    
[docs]    def sphingosine_base(self, sph):
        
        if sph not in self.sph_scores:
            
            method = 'sphingosine_%s' % sph.lower()
            
            self.sph_scores[sph], self.sph_max_scores[sph] = (
                getattr(self, method)() if hasattr(self, method) else (0, 0)
            )
        
        return self.sph_scores[sph], self.sph_max_scores[sph]
    
[docs]    def sphingosine_d(self):
        
        score = 0
        max_score = 20
        
        if self.rec.chainsum and self.rec.chainsum.u == 0:
            
            return score, max_score
        
        if (
            self.nacyl and self.scn.chain_fragment_type_is(
                0,
                frag_type = 'Sph-2xH2O+H',
                u = (False, {0}),
            )
        ) or (
            not self.nacyl and self.scn.chain_fragment_type_is(
                0,
                frag_type = 'Sph-H2O+H',
                u = (False, {0}),
            )
        ) or (
            self.rec.hg.main == 'SM' and self.scn.chain_among_most_abundant(
                5,
                frag_type = 'Sph-2xH2O+H',
                u = (False, {0}),
                skip_non_chains = True,
            )
        ):
            
            score += 6
            
            score += sum(map(bool,
                (
                    not self.scn.has_fragment('[C2+NH2+O] (60.0444)'),
                    self.scn.has_fragment('NL [C+2xH2O] (NL 48.0211)')
                )
            )) * 2
            
            if (
                self.scn.chain_fragment_type_among_most_abundant(
                    4, frag_type = 'Sph-H2O+H', u = (False, {0})
                ) and
                self.scn.chain_fragment_type_among_most_abundant(
                    4, frag_type = 'Sph-C-O-H2O-H', u = (False, {0})
                ) and
                self.scn.chain_fragment_type_among_most_abundant(
                    4, frag_type = 'Sph-2xH2O+H', u = (False, {0})
                )
            ):
                score += 10
        
        return score, max_score
    
[docs]    def sphingosine_dh(self):
        
        score = 0
        max_score = 20
        
        score += sum(map(bool,
            (
                self.scn.has_fragment('[C2+NH2+O] (60.0444)'),
                not self.scn.has_fragment('NL [C+2xH2O] (NL 48.0211)')
            )
        ))
        
        score += sum(map(bool,
            (
                self.scn.chain_fragment_type_among_most_abundant(
                    5, frag_type = 'Sph-H2O+H', u = 0
                ),
                self.scn.chain_fragment_type_among_most_abundant(
                    5, frag_type = 'FA+NH2-O', u = 0
                ),
                self.scn.chain_fragment_type_among_most_abundant(
                    10, frag_type = 'Sph-2xH2O+H', u = 0
                ),
                self.scn.has_chain_fragment_type(
                    frag_type = 'Sph-C-O-H2O-H', u = 0
                ),
                self.scn.has_chain_fragment_type(
                    frag_type = 'Sph+H', u = 0
                ),
                self.scn.has_chain_fragment_type(
                    frag_type = 'Sph-C-O-H2O-NH', u = 0
                )
            )
        )) * 3
        
        return score, max_score
    
[docs]    def sphingosine_t(self):
        
        score = 0
        max_score = 20
        
        if all((
            self.scn.chain_fragment_type_among_most_abundant(
                5, frag_type = 'Sph-H2O-H',
            ),
            self.scn.chain_fragment_type_among_most_abundant(
                10, frag_type = 'Sph-2xH2O-H',
            ),
            (
                self.scn.fragment_among_most_abundant(
                    '[C2+NH2+O] (60.0444)'
                ) or
                self.rec.hg.sub
            )
        )):
            
            score = 9
            
            score += sum(map(bool,
                (
                    not self.scn.has_fragment('NL [C+2xH2O] (NL 48.0211)'),
                    self.scn.has_fragment('NL [3xH2O] (NL 54.0317)')
                )
            ))
            
            score += sum(map(bool,
                (
                    self.scn.has_chain_fragment_type(
                        frag_type = 'Sph-C-2xH2O',
                    ),
                    self.scn.has_chain_fragment_type(
                        frag_type = 'Sph+H2O-H',
                    ),
                    self.scn.chain_fragment_type_among_most_abundant(
                        5, frag_type = 'Sph-H',
                    ),
                )
            )) * 3
        
        return score, max_score
    
[docs]    def sphingosine_k(self):
        
        score = 0
        max_score = 39
        
        if self.scn.has_chain_fragment_type(frag_type = 'Sph-NH2-H2O-2H'):
            
            score += 15
        
        score += sum(map(bool,
            (
                self.scn.has_fragment('[C2+NH2+O] (60.0444)'),
                self.scn.has_fragment('[C4+NH2+OH] (86.0600)'),
                self.scn.has_fragment('[C6+OH] (99.0804)'),
                self.scn.has_fragment('[C3+NH2] (56.0495)'),
            )
        )) * 3
        
        score += sum(map(bool,
            (
                self.scn.has_chain_fragment_type(
                    frag_type = 'Sph-C-2xH2O',
                ),
                self.scn.has_chain_fragment_type(
                    frag_type = 'Sph-H2O-H',
                ),
                self.scn.has_chain_fragment_type(
                    frag_type = 'Sph-H',
                ),
                self.scn.chain_fragment_type_among_most_abundant(
                    5, frag_type = 'Sph-H',
                ),
            )
        )) * 3
        
        return score, max_score


[docs]class Cer_Negative(AbstractMS2Identifier):
    """
    Examines if a positive mode MS2 spectrum is a ceramide.
    Identifies ceramide varieties including sphingomyeline,
    ceramide-1-phosphare, ceramide-phosphoethanolamine,
    OH-acyl-ceramide, hexosyl and dihexosyl-ceramides,
    and d, t and DH long chain base varieties.
    
    dCer
    ====
    
    **Specimen:**
    
    - in vivo SEC14L1 583, 554, 580 (formiate adduct)
    - in vivo STARD11 583, 554 (formiate adduct)
    - standards
    
    DHCer
    =====
    
    **Specimen:**
    
    - standards
    
    tCer
    ====
    
    **Specimen:**
    
    - standards
    
    """
    
    class_methods = {
        'Cer': 'cer',
        'SM':  'sm',
    }
    
    subclass_methods = {
        '1P': 'cer1p',
        'Hex': 'hexcer',
    }
    
    def __init__(self, record, scan, **kwargs):
        
        self.nacyl = record.chainsum is not None and len(record.chainsum) > 1
        self.oacyl = record.chainsum is not None and len(record.chainsum) > 2
        
        AbstractMS2Identifier.__init__(
            self,
            record,
            scan,
            missing_chains = (),
            chain_comb_args = {},
            must_have_chains = True,
            **kwargs,
        )
        
        self.sph_scores     = {}
        self.fa_scores      = {}
        self.sph_max_scores = {}
        self.fa_max_scores  = {}
    
[docs]    def confirm_class(self):
        
        AbstractMS2Identifier.confirm_class(self)
        
        self.confirm_subclass()
    
[docs]    def confirm_chains_explicit(self):
        
        for chains in itertools.chain(
            AbstractMS2Identifier.confirm_chains_explicit(self),
            AbstractMS2Identifier.confirm_chains_implicit(self),
        ):
            
            if chains[0][0].attr.sph == self.rec.chainsum.attr[0].sph:
                
                # the sphingosin base and fatty acyl related part of the
                # score is valid only for the current chain combination
                # hence now we add these to the overall score, yield the
                # identification and then subtract them from the score
                sph_score, sph_max_score = self.sphingosine_base(
                    chains[0][0].attr.sph
                )
                self.score += sph_score
                self.max_score += sph_max_score
                
                if self.nacyl:
                    
                    fa_score, fa_max_score = self.fatty_acyl(chains[0][1])
                    self.score += fa_score
                    self.max_score += fa_max_score
                
                yield chains
                
                self.score -= sph_score
                self.max_score -= sph_max_score
                
                if self.nacyl:
                    
                    self.score -= fa_score
                    self.max_score -= fa_max_score
    
[docs]    def fatty_acyl(self, fa):
        
        score = 0
        max_score = 0
        
        if len(fa.attr.oh) == 1:
            
            max_score = 30
            
            if self.scn.has_chain_combination(
                self.rec,
                head = 20, # to exclude tCer
                chain_param = (
                    {
                        'frag_type': {
                            'Sph-H', # b1
                            'Sph-C2H4-NH2-H2O', # b5
                        }
                    },
                    {
                        'frag_type': {
                            'FA+C2+NH2+O',  # a5 @ hydroxyacyl
                            'FA+CH2+NH2+O', # a1 @ hydroxyacyl
                        }
                    }
                )
            ):
                
                score += 30
        
        return score, max_score
    
[docs]    def cer(self):
        
        max_score = 23
        
        cer_nl = (
            'NL H2O (NL 18.0106)', # Hsu c1
            'NL 2xH2O (NL 36.0211)', # Hsu c4
            'NL C+H2O (NL 30.0106)', # Hsu c2
            'NL CH2+H2O (NL 32.0262)', # Hsu c3
            'NL C+2xH2O (NL 48.0211)', # Hsu c5
            'NL C+3xH2O (66.0455)', # Hsu c6
        )
        
        score = sum(
            self.scn.has_fragment(frag_name, adduct = self.add)
            for frag_name in cer_nl
        ) * 3
        
        if self.scn.has_chain_combinations(self.rec, adduct = self.add):
            
            score += 5
        
        return score, max_score
    
[docs]    def sphingosine_base(self, sph):
        
        if sph not in self.sph_scores:
            
            method = 'sphingosine_%s' % sph.lower()
            
            self.sph_scores[sph], self.sph_max_scores[sph] = (
                getattr(self, method)() if hasattr(self, method) else (0, 0)
            )
        
        return self.sph_scores[sph], self.sph_max_scores[sph]
    
[docs]    def sphingosine_d_dh(self):
        
        score = 0
        max_score = 20
        
        if self.scn.has_chain_combination(
            self.rec,
            chain_param = (
                {
                    'frag_type': {
                        'Sph-H', # b1
                        'Sph-C2H4-3H', # b2
                        'Sph-CH2-H2O-H', # b3
                        'Sph-H2O-NH2-2H', # b4
                        'Sph-C2H4-NH2-H2O', # b5
                    }
                },
                {
                    'frag_type': {
                        'FA+C2+NH2', # a2
                        'FA+C2+NH2-O', # a3
                    }
                }
            )
        ):
            
            score += 20
        
        return score, max_score
    
[docs]    def sphingosine_d(self):
        
        score = 0
        max_score = 20
        
        d_dh_score, d_dh_max_score = self.sphingosine_d_dh()
        
        score += d_dh_score
        max_score += d_dh_max_score
        
        if self.scn.has_fragment('NL C+H2O (NL 30.0106)', adduct = self.add):
            
            score += 20
        
        return score, max_score
    
[docs]    def sphingosine_dh(self):
        
        score = 0
        max_score = -20
        
        d_dh_score, d_dh_max_score = self.sphingosine_d_dh()
        
        score += d_dh_score
        max_score += d_dh_max_score
        
        if self.scn.has_fragment('NL C+H2O (NL 30.0106)', adduct = self.add):
            
            score -= 20
        
        return score, max_score
    
[docs]    def sphingosine_t(self):
        
        score = 0
        max_score = 28
        
        if self.scn.has_fragment('NL C+3xH2O (66.0455)', adduct = self.add):
            
            score += 5
        
        if self.scn.has_fragment('HexCer identity II'):
            
            score += 3
        
        if self.scn.has_chain_combination(
            self.rec,
            chain_param = (
                {
                    'frag_type': {
                        'Sph-CH2-NH2-4H', # b6
                    }
                },
                {
                    'frag_type': {
                        'FA+C2H2+NH2', # a1
                        'FA+C3H2+NH2', # a10
                    }
                }
            )
        ):
            
            score += 20
        
        # differentiate from hydroxyacyl-dCer
        if self.scn.chain_percent_of_most_abundant(
            frag_type = {'FA+C2H2+NH2+O', 'FA+C2+NH2+O'},
            percent = 5.0,
        ):
            
            score -= 10
        
        return score, max_score
    
[docs]    def cer1p(self):
        
        score = 0
        max_score = 70
        
        if any(map(bool, (
            self.scn.has_fragment('Cer1P/PIP/PL metaphosphate (78.9591)'),
            self.scn.has_fragment('Cer1P/PI phosphate (96.9696)'),
        ))):
            
            score += 20
            
            if self.scn.has_fragment(
                'NL H2O (NL 18.0106)', adduct = self.add
            ):
                
                score += 10
            
            if self.scn.has_chain_fragment_type(
                frag_type = {'NLFA_pH2O', 'NLFA_p2xH2O'},
                adduct = self.add
            ):
                
                score += 10
            
            self.must_have_chains = False
        
        if (
            self.rec.hg.main == 'Sph' and
            self.scn.fragment_among_most_abundant(
                'Cer1P/PIP/PL metaphosphate (78.9591)', 3
            )
        ):
            
            score += 20
            self.must_have_chains = False
            
            if self.scn.has_fragment('Cer1P/PI phosphate (96.9696)'):
                
                score += 10
        
        return score, max_score
    
[docs]    def sm(self):
        
        score = 0
        max_score = 45
        
        if self.scn.fragment_among_most_abundant(
            'NL CH2 (NL 14.0157)', 3, adduct = self.add
        ) and self.scn.fragment_among_most_abundant(
            'PC/SM PO4+choline-CH3 (168.0431)', 5
        ):
            
            score += 30
            
            score += sum(map(bool, (
                self.scn.has_fragment(
                    'Cer1P/PIP/PL metaphosphate (78.9591)'
                ),
                self.scn.has_fragment(
                    'NL choline+H2O', adduct = self.add
                ),
                self.scn.has_fragment(
                    'NL choline+H2O-CH3', adduct = self.add
                ),
            ))) * 5
            
            self.must_have_chains = False
        
        return score, max_score
    
[docs]    def hexcer(self):
        
        score = 0
        max_score = 90
        
        self.score += sum(map(bool, (
            self.scn.fragment_among_most_abundant('HexCer identity I', 10),
            self.scn.fragment_among_most_abundant('HexCer identity II', 10),
            self.scn.fragment_among_most_abundant('HexCer identity III', 10),
            self.scn.has_fragment('[Hexose] (179.0561)'),
            self.scn.has_fragment('[Hexose-H2O] (161.0455)'),
            self.scn.has_fragment('[Hexose-HCHO] (149.0455)'),
            self.scn.has_fragment('NL hexose (162.053)'),
            self.scn.has_fragment('NL hexose+H2O (180.063)'),
        ))) * 10
        
        if self.scn.has_chain_combinations(self.rec):
            
            score += 10
        
        return score, max_score
    
[docs]    def hex2cer(self):
        
        score = 0
        max_score = 140
        
        self.score += sum(map(bool, (
            self.scn.has_fragment('HexCer identity I'),
            self.scn.has_fragment('HexCer identity II'),
            self.scn.has_fragment('HexCer identity III'),
            self.scn.has_fragment('[Hexose] (179.0561)'),
            self.scn.has_fragment('[Hexose-H2O] (161.0455)'),
            self.scn.has_fragment('[Hexose-HCHO] (149.0455)'),
            self.scn.has_fragment('NL hexose (162.053)'),
            self.scn.has_fragment('NL hexose+H2O (180.063)'),
            self.scn.has_fragment('[2xHexose-HCHO] (311.0984)'),
            self.scn.has_fragment('[2xHexose-H2O] (323.0984)'),
            self.scn.has_fragment('[2xHexose] (341.1089)'),
            self.scn.has_fragment('NL 2xHexose (324.106)'),
            self.scn.has_fragment('NL 2xHexose+H2O (342.1162)'),
        ))) * 10
        
        if self.scn.has_chain_combinations(self.rec):
            
            score += 10
        
        return score, max_score
    
[docs]    def shexcer(self):
        
        score = 0
        max_score = 100
        
        self.missing_chains = (1,)
        
        if self.scn.has_fragment('Sulphate (96.9601)'):
            
            score += 20
        
        self.score += sum(map(bool, (
            self.scn.has_fragment('[Sulfohexose] (259.0129)'),
            self.scn.has_fragment('[Sulfohexose] (256.9972)'),
            self.scn.has_fragment('[Sulfohexose-H2O] (241.0024)'),
            self.scn.has_fragment('[Sulfohexose+Et+N] (300.0395)'),
        ))) * 10
        
        if self.scn.has_chain_fragment_type(
            frag_type = {
                'Sph+C6O5H8+SO3+H2O',
                'Sph+C6O5H8+SO3+CO+H2O',
            }
        ):
            
            score += 20
            
            if self.scn.has_chain_combination(
                self.rec,
                chain_param = (
                    {
                        'frag_type': {
                            'Sph+C6O5H8+SO3',
                            'Sph+C6O5H8+SO3+H2O',
                            'Sph+C6O5H8+SO3+CO+H2O',
                        }
                    },
                    {
                        'frag_type': {
                            'NLFA',
                            'NLFA_mH2O',
                        }
                    }
                )
            ):
                
                score += 20
        
        return score, max_score
    
[docs]    def shex2cer(self):
        
        score = 0
        max_score = 140
        
        self.missing_chains = (1,)
        
        if self.scn.has_fragment('Sulphate (96.9601)'):
            
            score += 20
        
        score += sum(map(bool, (
            self.scn.has_fragment('[Sulfohexose] (259.0129)'),
            self.scn.has_fragment('[Sulfohexose] (256.9972)'),
            self.scn.has_fragment('[Sulfohexose-H2O] (241.0024)'),
            self.scn.has_fragment('[Sulfohexose+Et+N] (300.0395)'),
            self.scn.has_fragment('[2xHexose-H2O+SO3] (403.0552)'),
            self.scn.has_fragment('[2xHexose+SO3] (419.0501)'),
            self.scn.has_fragment('[2xHexose+SO3] (421.0658)'),
            self.scn.has_fragment('[2xHexose+SO3+Et+N] (462.0923)'),
        ))) * 10
        
        if self.scn.has_chain_fragment_type(
            frag_type = {
                'Sph+C12O10H18+SO3',
                'Sph+C12O10H18+SO3+H2O',
                'Sph+C12O10H18+SO3+CO+H2O',
            }
        ):
            
            score += 20
            
            if self.scn.has_chain_combination(
                self.rec,
                chain_param = (
                    {
                        'frag_type': {
                            'Sph+C12O10H18+SO3',
                            'Sph+C12O10H18+SO3+H2O',
                            'Sph+C12O10H18+SO3+CO+H2O',
                        }
                    },
                    {
                        'frag_type': {
                            'NLFA',
                            'NLFA_mH2O',
                        }
                    }
                )
            ):
                
                score += 20
        
        return score, max_score
    
[docs]    def pe_cer(self):
        
        score = 0
        max_score = 30
        
        score += sum(map(bool, (
            self.scn.has_fragment('PE [P+E] (140.0118)'),
            self.scn.has_fragment('NL PE [P+E] (141.0191)'),
            self.scn.has_fragment('PE [P+E-H2O] (122.0013)'),
        ))) * 10
        
        return score, max_score

#
# Scan.identify() dispatches identification methods as below
#

idmethods = {
    'neg': {
        lipproc.Headgroup(main = 'FA'):  FA_Negative,
        lipproc.Headgroup(main = 'DAG'): DAG_Negative,
        lipproc.Headgroup(main = 'TAG'): TAG_Negative,
        lipproc.Headgroup(main = 'DGTA'): GL_Negative,
        lipproc.Headgroup(main = 'DGTS'): GL_Negative,
        lipproc.Headgroup(main = 'DGCC'): GL_Negative,
        lipproc.Headgroup(main = 'SQDG'): GL_Negative,
        lipproc.Headgroup(main = 'MGDG'): GL_Negative,
        lipproc.Headgroup(main = 'DGDG'): GL_Negative,
        lipproc.Headgroup(main = 'DGTA', sub = ('Lyso',)): GL_Negative,
        lipproc.Headgroup(main = 'DGTS', sub = ('Lyso',)): GL_Negative,
        lipproc.Headgroup(main = 'DGCC', sub = ('Lyso',)): GL_Negative,
        lipproc.Headgroup(main = 'SQDG', sub = ('Lyso',)): GL_Negative,
        lipproc.Headgroup(main = 'MGDG', sub = ('Lyso',)): GL_Negative,
        lipproc.Headgroup(main = 'DGDG', sub = ('Lyso',)): GL_Negative,
        lipproc.Headgroup(main = 'PE'):  PE_Negative,
        lipproc.Headgroup(main = 'PE', sub = ('Lyso',)): PE_Negative,
        lipproc.Headgroup(main = 'PC'):  PC_Negative,
        lipproc.Headgroup(main = 'PC', sub = ('Lyso',)): PC_Negative,
        lipproc.Headgroup(main = 'PI'):  PI_Negative,
        lipproc.Headgroup(main = 'PI', sub = ('Lyso',)):  PI_Negative,
        lipproc.Headgroup(main = 'PS'):  PS_Negative,
        lipproc.Headgroup(main = 'PS', sub = ('Lyso',)): PS_Negative,
        lipproc.Headgroup(main = 'PG'):  PG_Negative,
        lipproc.Headgroup(main = 'PG', sub = ('Lyso',)):  PG_Negative,
        lipproc.Headgroup(main = 'BMP'): BMP_Negative,
        lipproc.Headgroup(main = 'PA'):  PA_Negative,
        lipproc.Headgroup(main = 'PA', sub = ('Lyso',)):  PA_Negative,
        lipproc.Headgroup(main = 'VA'): VA_Negative,
        lipproc.Headgroup(main = 'Cer'): Cer_Negative,
        lipproc.Headgroup(main = 'Cer', sub = ('1P',)): Cer_Negative,
        lipproc.Headgroup(main = 'SM'): Cer_Negative,
        lipproc.Headgroup(main = 'Cer', sub = ('Hex',)): Cer_Negative,
        lipproc.Headgroup(main = 'Cer', sub = ('Hex2',)): Cer_Negative,
        lipproc.Headgroup(main = 'Cer', sub = ('SHex',)): Cer_Negative,
        lipproc.Headgroup(main = 'Cer', sub = ('SHex2',)): Cer_Negative,
        lipproc.Headgroup(main = 'Cer', sub = ('PE',)): Cer_Negative,
        lipproc.Headgroup(main = 'Sph'): Cer_Negative,
        lipproc.Headgroup(main = 'Sph', sub = ('1P',)): Cer_Negative,
    },
    'pos': {
        lipproc.Headgroup(main = 'FA'):  FA_Positive,
        lipproc.Headgroup(main = 'DAG'): DAG_Positive,
        lipproc.Headgroup(main = 'DGTA'): GL_Positive,
        lipproc.Headgroup(main = 'DGTS'): GL_Positive,
        lipproc.Headgroup(main = 'DGCC'): GL_Positive,
        lipproc.Headgroup(main = 'SQDG'): GL_Positive,
        lipproc.Headgroup(main = 'MGDG'): GL_Positive,
        lipproc.Headgroup(main = 'DGDG'): GL_Positive,
        lipproc.Headgroup(main = 'DGTA', sub = ('Lyso',)): GL_Positive,
        lipproc.Headgroup(main = 'DGTS', sub = ('Lyso',)): GL_Positive,
        lipproc.Headgroup(main = 'DGCC', sub = ('Lyso',)): GL_Positive,
        lipproc.Headgroup(main = 'SQDG', sub = ('Lyso',)): GL_Positive,
        lipproc.Headgroup(main = 'MGDG', sub = ('Lyso',)): GL_Positive,
        lipproc.Headgroup(main = 'DGDG', sub = ('Lyso',)): GL_Positive,
        lipproc.Headgroup(main = 'TAG'): TAG_Positive,
        lipproc.Headgroup(main = 'PE'):  PE_Positive,
        lipproc.Headgroup(main = 'PE', sub = ('Lyso',)):  LysoPE_Positive,
        lipproc.Headgroup(main = 'PC'):  PC_Positive,
        lipproc.Headgroup(main = 'PC', sub = ('Lyso',)):  LysoPC_Positive,
        lipproc.Headgroup(main = 'PI'):  PI_Positive,
        lipproc.Headgroup(main = 'PI', sub = ('Lyso',)):  PI_Positive,
        lipproc.Headgroup(main = 'PS'):  PS_Positive,
        lipproc.Headgroup(main = 'PS', sub = ('Lyso',)):  PS_Positive,
        lipproc.Headgroup(main = 'PG'):  PG_Positive,
        lipproc.Headgroup(main = 'PG', sub = ('Lyso',)):  PG_Positive,
        lipproc.Headgroup(main = 'BMP'): BMP_Positive,
        lipproc.Headgroup(main = 'PA'):  PA_Positive,
        lipproc.Headgroup(main = 'PA', sub = ('Lyso',)):  PA_Positive,
        lipproc.Headgroup(main = 'VA'): VA_Positive,
        lipproc.Headgroup(main = 'Cer'): Cer_Positive,
        lipproc.Headgroup(main = 'Cer', sub = ('1P',)): Cer_Positive,
        lipproc.Headgroup(main = 'Cer', sub = ('Hex',)): Cer_Positive,
        lipproc.Headgroup(main = 'Cer', sub = ('Hex2',)): Cer_Positive,
        lipproc.Headgroup(main = 'Cer', sub = ('SHex',)): Cer_Positive,
        lipproc.Headgroup(main = 'Cer', sub = ('SHex2',)): Cer_Positive,
        lipproc.Headgroup(main = 'Cer', sub = ('PE',)): Cer_Positive,
        lipproc.Headgroup(main = 'SM'): Cer_Positive,
        lipproc.Headgroup(main = 'Cer', sub = ('1P', 'Lyso')): Cer_Positive,
        lipproc.Headgroup(main = 'Cer', sub = ('Hex', 'Lyso')): Cer_Positive,
        lipproc.Headgroup(main = 'Cer', sub = ('Hex2', 'Lyso')): Cer_Positive,
        lipproc.Headgroup(main = 'Cer', sub = ('SHex', 'Lyso')): Cer_Positive,
        lipproc.Headgroup(main = 'Cer', sub = ('SHex2', 'Lyso')):
            Cer_Positive,
        lipproc.Headgroup(main = 'Cer', sub = ('PE', 'Lyso')): Cer_Positive,
        lipproc.Headgroup(main = 'SM',  sub = ('Lyso',)): Cer_Positive,
        lipproc.Headgroup(main = 'Sph'): Cer_Positive,
        lipproc.Headgroup(main = 'Sph', sub = ('1P',)): Cer_Positive,
        lipproc.Headgroup(main = 'Sph', sub = ('M1',)): Cer_Positive,
        lipproc.Headgroup(main = 'Sph', sub = ('M2',)): Cer_Positive,
        lipproc.Headgroup(main = 'Sph', sub = ('M3',)): Cer_Positive,
    }
}


[docs]class MS2Feature(object):
    
    scan_methods = {
        'mgf':  'mgf_iterscans',
        'mzml': 'mzml_iterscans',
    }
    
    def __init__(
            self,
            mz,
            ionmode,
            resources,
            rt,
            ms1_records = None,
            rt_range = .5,
            check_rt = True,
        ):
        """
        Collects the MS2 scans from the provided resources for a single
        feature. Calls identification methods on all scans collected.
        
        :param float mz:
            m/z value of the precursor ion.
        :param str ionmode:
            Ion mode of the experiment. Either ``pos`` or ``neg``.
        :param dict resources:
            ``dict`` of MS2 scan resources. These are either ``mgf.MgfReader``
            objects or paths to MGF files. Later more resource types
            will be available, for example MzML format. Keys of the ``dict``
            are used as sample labels. Thes can be strings or tuples.
        :param dict ms1_records:
            A data structure resulted by ``moldb.adduct_lookup``. If ``None``
            the lookup will be done here.
        :param float rt_range:
            If a single retention time value provided this is the largest
            accepted difference between an MS2 scan's RT and the precursor's
            RT. E.g. if ``rt = 8.3`` and ``rt_range = 0.5``, scans between
            7.8 and 8.8 will be considered. If a tuple of floats provided
            for RT, scans between these two values will be considered.
        :param bool check_rt:
            Check if the retention time of the scan is enough close to the
            precursor's RT. If ``False``, scans will be matched only by the
            m/z value of the precursor and scans with any large RT difference
            will be analysed.
        """
        
        self.mz = mz
        self.ionmode = ionmode
        self.ms1_records = ms1_records or moldb.adduct_lookup(mz, ionmode)
        self.resources = resources
        self.rt = (rt - rt_range, rt + rt_range) if type(rt) is float else rt
        self.rtmean = sum(self.rt) / 2.0
        self.rt_range = rt_range
        self.check_rt = check_rt
    
[docs]    def main(self):
        
        self.ms1_lookup()
        self.build_scans()
        self.identify()
    
[docs]    def iterscans(self):
        
        for sample_id, resource in iteritems(self.resources):
            
            res_type = self.guess_resouce_type(resource)
            
            if res_type not in self.scan_methods:
                
                raise ValueError(
                    'Unknown MS2 resource type: %s' % str(resource)
                )
            
            scan_method = getattr(self, self.scan_methods[res_type])
            
            for scan in scan_method(resource, sample_id):
                
                yield scan
    
[docs]    def mgf_iterscans(self, mgf_resource, sample_id = None):
        
        if isinstance(mgf_resource, basestring):
            
            mgffile = mgf.MgfReader(mgfname, charge = None)
            
        elif isinstance(mgf_resource, mgf.MgfReader):
            
            mgffile = mgf_resource
            
        else:
            
            raise ValueError(
                'Mgf files should be lipyd.mgf.MgfReader '
                'instances of file names.'
            )
        
        idx, rtdiff = mgffile.lookup(self.mz, rt = self.rtmean)
        
        for i, rtd in zip(idx, rtdiff):
            
            if self.check_rt:
                
                scan_rt = self.rtmean + rtd
                
                if scan_rt < self.rt[0] or scan_rt > self.rt[1]:
                    
                    continue
            
            sc = mgffile.get_scan(i)
            
            yield Scan(
                mzs = sc[:,0],
                intensities = sc[:,1],
                ionmode = self.ionmode,
                precursor = self.mz,
                ms1_records = self.ms1_records,
                scan_id = mgffile.mgfindex[i,3],
                sample_id = sample_id,
                source = mgffile.fname,
                deltart = rtd,
                rt = mgffile.mgfindex[i,2],
            )
    
[docs]    def mzml_iterscans(self, mzml_resource, sample_id = None):
        
        raise NotImplementedError
    
[docs]    @staticmethod
    def guess_resouce_type(res):
        
        if isinstance(res, basestring) and os.path.exists(res):
            
            if res[-3:].lower() == 'mgf':
                
                return 'mgf'
            
        elif isinstance(res, mgf.MgfReader):
            
            return 'mgf'
    
[docs]    def build_scans(self):
        
        self.scans = np.array(list(self.iterscans()))
        self.deltart = np.array([sc.rt - self.rtmean for sc in self.scans])
        
        rtsort = [
            it[0]
            for it in sorted(
                (it for it in enumerate(self.deltart)),
                key = lambda it: abs(it[1])
            )
        ]
        
        self.scans = self.scans[rtsort]
        self.deltart = self.deltart[rtsort]
    
[docs]    def identify(self):
        
        self.identities = []
        
        for scan in self.scans:
            
            identity = scan.identify()
            
            if identity:
                
                self.identities.append(identity)
    
[docs]    def identity_summary(
            self,
            scores = True,
            drt = True,
            sample_ids = False,
            scan_ids = False,
        ):
        
        identities = set()
        
        for i, scan_i in enumerate(self.identities):
            
            for sum_str, varieties in iteritems(scan_i):
                
                for var in varieties:
                    
                    summary = [var.__str__()]
                    
                    if scores:
                        
                        summary.append(var.score_pct)
                        
                    else:
                        
                        if var.score == 0:
                            
                            continue
                    
                    if drt:
                        
                        summary.append(self.deltart[i])
                    
                    if sample_ids:
                        
                        summary.append(self.scans[i].sample_id)
                    
                    if scan_ids:
                        
                        summary.append(self.scans[i].scan_id)
                    
                    summary.append(sum_str)
                    
                    identities.add(tuple(summary))
        
        return identities
    
[docs]    def ms1_lookup(self):
        
        if self.ms1_records is None:
            
            self.ms1_records = moldb.adduct_lookup(self.mz)

##############################################################################


[docs]class MS2FeatureOld(object):
    """
    Provides additional, more sophisticated methods
    for identification of a single feature.
    
    In the original concept all methods for identification
    based on MS1 and MS2 took place in class Screening(),
    as those could simply iterate through the arrays.
    
    Later more complex methods became necessary, so
    I created this class to group them.
    """
    
    def __init__(self, main, protein, mode, oi, log = True):
        """
        @main : ltp.Screening() instance
            One Screening() instance with MS1 and MS2 processing already done.
        
        @protein : str
            Protein name
        
        @mode : str
            MS mode (`pos` or `neg`)
        
        @oi : int
            Original index of one feature.
        
        @log : bool
            Whether output verbose messages to logfile.
        """
        self.main = main
        self.log = log
        self.protein = protein
        self.mode = mode
        self.oi = oi
        self.ifracs = self.main.fraction_indices(self.protein)
        self.fracsi = dict(map(lambda fr: (fr[1][0], fr[0]),
                               iteritems(self.ifracs)))
        self.tbl = self.main.valids[self.protein][self.mode]
        self.ms2 = self.tbl['ms2'][self.oi]
        self.i = self.main.oi2i(self.protein, self.mode, self.oi)
        self.fa = {}
        self.scans_fractions = map(
            lambda tpl: tuple(map(int, tpl)),
            uniqList(
                map(
                    tuple,
                    # scan ID, fraction ID
                    self.ms2[:,[12,14]]
                )
            )
        )
        self.classes = ['PA', 'PC', 'PE', 'PG', 'PS']
        self.classes2 = ['PA', 'PC', 'PE', 'PG', 'PS', 'PI', 'SM', 'BMP',
                         'Cer', 'Cer1P', 'HexCer', 'HexCerOH',
                         'DAG', 'TAG', 'FA', 'VA', 'LysoPE', 'LysoPC']
        self.identities = set([])
        self.identities2 = {}
        # get carbon counts from MS1
        self.ms1fa = self.tbl['ms1fa'][oi]
        # sorting by fractions/scans
        self.scans = dict(
            map(
                lambda sc_fr:
                    (
                        # scan ID, fraction ID: key
                        (sc_fr[0], sc_fr[1]),
                        # MS2 array slice: value
                        self.ms2[
                            np.where(
                                np.logical_and(
                                    self.ms2[:,12] == sc_fr[0],
                                    self.ms2[:,14] == sc_fr[1]
                                )
                            )
                        ]
                    ),
                self.scans_fractions
            )
        )
        # sorting by intensity desc
        self.scans = dict(
            map(
                lambda i:
                    (
                        i[0],
                        i[1][i[1][:,2].argsort()[::-1],:]
                    ),
                iteritems(self.scans)
            )
        )
        self.deltart = dict(
            map(
                lambda i:
                    (
                        i[0],
                        self.tbl['rtm'][self.i] - i[1][0,11]
                    ),
                iteritems(self.scans)
            )
        )
        self._scans = dict(
            map(
                lambda i:
                    (
                        i[0],
                        # i[0]: (scan ID, fraction ID)
                        # i[1]: MS2 array slice
                        MS2Scan(i[1], i[0], self)
                    ),
                iteritems(self.scans)
            )
        )
        self.maxins = dict(
            map(
                lambda i:
                    (
                        i[0],
                        i[1][0,2]
                    ),
                iteritems(self.scans)
            )
        )
        self.medins = dict(
            map(
                lambda i:
                    (
                        i[0],
                        np.median(i[1][:,2])
                    ),
                iteritems(self.scans)
            )
        )
        self.sort_scans()
        self.select_best_scan()
        self.msg('\n::: Analysing feature: %s :: %s :: index = %u ::'\
                ' m/z = %.03f :: number of MS2 scans: %u\n' % \
            (self.protein, self.mode, self.oi, self.tbl['mz'][self.i],
                len(self._scans))
        )
        self.msg('\n::: Database lookup resulted '\
            'the following species: %s\n' % self.print_db_species())
        self.msg('\n::: Intensities:\n%s%s\n' % \
            (' ' * 24, '          '.join(['A09', 'A10', 'A11', 'A12', 'B01'])))
        self.msg('%s%s' % (' ' * 16, '=' * 63))
        self.msg('\n    - absolute:  %s' % '   '.join(
          map(lambda x: '%10.01f' % x, self.tbl['fe'][self.i,:]))
        )
        self.msg('\n    - relative: %s\n' % \
          '  '.join(
                map(
                    lambda xx:
                        '%10.02f%%' % (xx * 100.0),
                    map(
                        lambda x:
                            x / np.nanmax(self.tbl['fe'][self.i,:]),
                        self.tbl['fe'][self.i,:]
                    )
                )
            )
        )
        self.msg('\n::: MS2 scans available (%u):\n\n' % len(self.scans))
        
        for sc in self._scans.values():
            sc.print_scan()
    
[docs]    def sort_scans(self):
        """
        Groups the scans in 3 groups: highest consists of those from the
        fractions with the highest protein level (there might be more than
        one the highest, because the fraction offset limits); the secondary
        contains scans from other protein containing fractions; while the
        other contains the scans from non protein containing fractions.
        Within the groups the scans are sorted from lowest to highest
        deltaRT.
        """
        
        self.highest = []
        self.secondary = []
        self.other = []
        with_protein = self.main.protein_containing_fractions(self.protein)
        for scan_num, fr in self.scans.keys():
            fr_name = 'a%u' % fr if fr != 13 and fr != 1 else 'b1'
            if fr_name in with_protein:
                if fr_name == self.main.fracs_orderL[self.protein][0][0] or \
                    fr_name == self.main.fracs_orderU[self.protein][0][0]:
                    self.highest.append((scan_num, fr))
                else:
                    self.secondary.append((scan_num, fr))
            else:
                self.other.append((scan_num, fr))
        self.highest = sorted(self.highest, key = lambda sc: abs(self._scans[sc].deltart))
        self.secondary = sorted(self.secondary, key = lambda sc: abs(self._scans[sc].deltart))
        self.other = sorted(self.other, key = lambda sc: abs(self._scans[sc].deltart))
    
[docs]    def select_best_scan(self):
        self.best_scan = \
            self.highest[0] if len(self.highest) else \
            self.secondary[0] if len(self.secondary) else \
            self.other[0] if len(self.other) else \
            None
    
[docs]    def print_db_species(self):
        return ', '.join(
            map(
                lambda hg:
                    '%s' % (
                        hg \
                            if hg not in self.tbl['ms1fa'][self.oi] \
                            or not len(self.tbl['ms1fa'][self.oi][hg]) \
                            else \
                        ', '.join(
                            map(
                                lambda fa:
                                    '%s(%s)' % (hg, fa),
                                self.tbl['ms1fa'][self.oi][hg]
                            )
                        )
                    ),
                self.tbl['ms1hg'][self.oi]
            )
        ) \
        if len(self.tbl['ms1hg'][self.oi]) \
        else 'none'
    
[docs]    def reload(self, children = False):
        modname = self.__class__.__module__
        mod = __import__(modname, fromlist=[modname.split('.')[0]])
        imp.reload(mod)
        new = getattr(mod, self.__class__.__name__)
        setattr(self, '__class__', new)
        
        if children:
            
            for sc in self._scans.values():
                
                sc.reload()
    
    def __str__(self):
        return ', '.join(
            map(
                lambda hgfas:
                    ', '.join(
                        map(
                            lambda fa:
                                '%s(%s)' % (hgfas[0], fa),
                            hgfas[1]
                        )
                    ),
                iteritems(self.fa)
            )
        )
    
[docs]    def get_header_div(self):
        return '\t\t<div class="ms2hdr">\n\t\t'\
            'MS2 scans of feature %.04f'\
            '\n\t\t\t|<span class="scansbutton morescans1"'\
                ' title="Show/hide scans from fractions with '\
                'highest protein concentration">scans+</span>\n'\
            '\n\t\t\t|<span class="scansbutton morescans2"'\
                ' title="Show/hide scans from other protein '\
                'containing fractions">scans++</span>\n'\
            '\n\t\t\t|<span class="scansbutton morescans3"'\
                ' title="Show/hide scans from non protein '\
                'containing fractions">scans+++</span>\n'\
            '\n\t\t\t|<span class="scansbutton morefrags"'\
                ' title="Show/hide fragments after 5 highest'\
                '">frags+</span>\n'\
            '\n\t\t\t|<span class="scansbutton remove"'\
                ' title="Remove scans of this feature'\
                '">remove</span>\n'\
            '\t\t</div>\n' % \
            self.tbl['mz'][self.i]
    
[docs]    def html_table(self):
        container = '\t<div id="%s" class="ms2tblcontainer">\n%s%s\n\t</div>'
        header = self.get_header_div()
        html = []
        if self.best_scan is not None:
            html.append(self._scans[self.best_scan].html_table())
        else:
            html.append('<div class="noscans">No scans '\
                'from fractions with highest protein concentration.</div>')
        for sc in sorted(self._scans.values(), key = lambda sc: abs(sc.deltart)):
            if sc.in_primary and sc.scan_id != self.best_scan:
                html.append(sc.html_table())
        for sc in sorted(self._scans.values(), key = lambda sc: abs(sc.deltart)):
            if not sc.in_primary and sc.scan_id != self.best_scan:
                html.append(sc.html_table())
        for sc in sorted(self._scans.values(), key = lambda sc: abs(sc.deltart)):
            if not sc.in_primary and not sc.in_secondary:
                html.append(sc.html_table())
        html = '\n'.join(html)
        return container % ('ms2c_%u_%u' % \
            (int(self.tbl['aaa'][self.i]), self.oi), header, html)
    
[docs]    def html_table_b64(self):
        return base64.encodestring(self.html_table()).replace('\n', '')
    
[docs]    def msg(self, text):
        if self.log:
            with open(self.main.ms2log, 'a') as f:
                f.write(text)
    
    def _any_scan(self, method, **kwargs):
        for i, sc in iteritems(self._scans):
            self.msg('\t\t:: Calling method %s() on scan #%u\n' % (method, i[0]))
            if getattr(sc, method)(**kwargs):
                return True
        return False
    
[docs]    def identify(self):
        for hg in self.classes:
            self.msg('\t>>> Attempting to identify %s in all scans\n' % (hg))
            if self._any_scan('is_%s' % hg.lower()):
                self.identities.add(hg)
                self.msg('\t<<< Result: identified as %s\n' % hg)
            else:
                self.msg('\t<<< Result: not %s\n' % hg)
    
[docs]    def identify2(self, num = 1):
        
        for scanid, scan in iteritems(self._scans):
            
            for hg in self.classes2:
                
                self.msg('\t>>> Attempting to identify %s in scan %u\n' %
                         (hg, scanid[0]))
                
                identified = False
                
                if hg not in self.identities2:
                    self.identities2[hg] = []
                
                method = '%s_%s_%u' % (hg.lower(), self.mode, num)
                
                if hasattr(scan, method):
                    
                    self.identities2[hg].append(getattr(scan, method)())
                    
                    identified = any(
                        map(
                            lambda i: i['score'] >= 5,
                            self.identities2[hg]
                        )
                    )
                
                if identified:
                    self.msg('\t<<< Result: identified as %s\n' % hg)
                else:
                    self.msg('\t<<< Result: not %s\n' % hg)
            
            if hasattr(scan, 'fa_co_2'):
                del scan.fa_co_2
            
            if hasattr(scan, 'fa_list'):
                scan.fa_list = None


[docs]class MS2Scan(object):
    """
    This class represents one MS2 scan and provides methods for its analysis.
    """
    
    def __init__(self, scan, scan_id, feature):
        
        self.scan = scan
        self.scan_id = scan_id
        self.feature = feature
        self.deltart = self.feature.deltart[self.scan_id]
        self.frac_id = self.scan_id[1]
        self.frac_name = self.feature.fracsi[self.frac_id]
        
        self.ms2_file = self.feature.main.ms2files\
            [self.feature.protein][self.feature.mode][self.frac_name]
        self.in_primary = self.frac_name in \
            self.feature.main.fracs_order[self.feature.protein]['prim']
        self.in_secondary = self.frac_name in \
            self.feature.main.fracs_order[self.feature.protein]['sec']
        self.i = self.feature.i
        self.tbl = self.feature.tbl
        self.insmax = self.scan[0,2]
        self.recc = re.compile(r'.*?([0-9]{1,2}):([0-9]).*')
        self.fa = {}
        self.fa1 = {}
        self._order = None
        self.sort_by_i()
        self.fa_list = None
        self.build_fa_list()
    
[docs]    def reload(self):
        modname = self.__class__.__module__
        mod = __import__(modname, fromlist=[modname.split('.')[0]])
        imp.reload(mod)
        new = getattr(mod, self.__class__.__name__)
        setattr(self, '__class__', new)
    
[docs]    def print_identities(self, fname = None):
        """
        Prints identities to standard output or file.
        """
        
        if fname is None:
            sys.stdout.write(self.identities_str())
        else:
            with open(fname, 'w') as fp:
                fp.write(self.identities_str())
    
[docs]    def identities_str(self, num = 1):
        """
        Returns table of all identification attempts as string.
        """
        
        result = ['=== Scan #%u (fraction %s) ===' % (
            self.scan_id[0], self.frac_name)]
        
        for hg in self.feature.classes2:
            
            method = '%s_%s_%u' % (
                hg.lower(), self.feature.mode, num
            )
            
            if not hasattr(self, method):
                continue
            
            idd = getattr(self, method)()
            
            result.append('%s\t%u\t%s' % (
                hg,
                idd['score'],
                ', '.join(idd['fattya'])
            ))
        
        return '%s\n' % '\n'.join(result)
    
[docs]    def print_scan(self):
        """
        Prints the list of fragments as an annotated table.
        """
        
        self.feature.msg(self.scan_str())
    
[docs]    def show(self):
        """
        Prints the scan table to standard output.
        """
        
        sys.stdout.write(self.scan_str())
    
[docs]    def scan_str(self):
        """
        Returns the scan table as string.
        """
        
        ms1mz = self.tbl['mz'][self.i]
        header = '\tFrag. m/z\tIntensity\tIdentity%sNL mass\n'\
            '\t%s\n' % (' ' * 26, '=' * 73)
        table = '\n\t'.join(
            map(
                lambda sc:
                    '%9.4f\t%10.2f\t%s%s%9.4f' % \
                        tuple(list(sc[[1, 2, 7]]) + \
                            [' ' * (32 - len(sc[7])), ms1mz - sc[1]]),
                self.scan
            )
        )
        
        fri = self.scan_id[1] - 9 if self.scan_id[1] != 1 else 4
        
        return (
            '\tScan %u (fraction %s%u; %s %s; '\
            'intensity = %.01f (%.02f%%)):\n\n%s\t%s\n\n' % \
            (self.scan_id[0],
             self.frac_name,
             self.frac_id,
             'contains' \
                if self.feature.ifracs[self.frac_name][1] \
                else 'does not contain',
             self.feature.protein,
             self.tbl['fe'][self.i, self.frac_id] \
                 if self.frac_id < self.tbl['fe'].shape[1] else np.nan,
             (self.tbl['fe'][self.i, self.frac_id] \
                 if self.frac_id < self.tbl['fe'].shape[1] else np.nan) / \
                 np.nanmax(self.tbl['fe'][self.i, :]) * 100.0,
             header,
             table)
        )
    
[docs]    def html_table(self):
        table = '\t\t<table id="%s" class="scantbl %s">\n%s\n\t\t</table>\n'
        th = '\t\t\t\t<th>\n\t\t\t\t\t%s\n\t\t\t\t</th>\n'
        ttl = '\t\t\t<tr class="%s">\n\t\t\t\t<th colspan="4">\n\t\t\t\t\t%s'\
            '\n\t\t\t\t</th>\n\t\t\t</tr>\n'
        tr = '\t\t\t<tr class="%s">\n%s\n\t\t\t</tr>\n'
        td = '\t\t\t\t<td>\n\t\t\t\t\t%s\n\t\t\t\t</td>\n'
        ms1mz = self.tbl['mz'][self.i]
        rows = ttl % (
            'scantitle',
            'Scan %u (%s, %s; '\
            'intensity = %.01f (%.02f%%); dRT = %.03f min)' % (
                self.scan_id[0],
                self.frac_name,
                'the highest fraction' if self.in_primary \
                    else 'not the highest, but contains %s' % \
                        self.feature.protein if self.in_secondary \
                    else 'does not contain %s' % \
                        self.feature.protein,
                self.tbl['fe'][self.i, self.frac_id] \
                    if fri < self.tbl['fe'].shape[1] else np.nan,
                (self.tbl['fe'][self.i, self.frac_id] \
                    if fri < self.tbl['fe'].shape[1] else np.nan) / \
                    np.nanmax(self.tbl['fe'][self.i, :]) * 100.0,
                self.deltart
            )
        )
        rows += tr % (
            'scanhdr',
            ''.join(
                map(
                    lambda cname:
                        th % cname,
                    ['Frag m/z', 'Intensity', 'Identity', 'NL mass']
                )
            )
        )
        for rn, row in enumerate(self.scan):
            rows += tr % (
                'fragrow %s' % ('first5' if rn < 5 else 'after5'),
                ''.join([
                    td % ('%.04f' % row[1]),
                    td % ('%.02f' % row[2]),
                    td % row[7],
                    td % ('%.04f' % (ms1mz - row[1]))
                ])
        )
        return table % ('%u_%u_%u' % (
            self.tbl['i'][self.i], self.scan_id[0], self.scan_id[1]),
            'best' if self.scan_id == self.feature.best_scan \
                else 'primary' if self.in_primary \
                else 'secondary' if self.in_secondary \
                else 'noprotein',
            rows
        )
    
[docs]    def get_by_rank(self, rank = 1, min_mz = 0.0):
        this_rank = 0
        return_next = False
        prev_mz = 0.0
        intensity = ''
        ids = []
        for r in self.scan:
            if r[1] < min_mz:
                continue
            if abs(r[1] - prev_mz) > 0.0001:
                prev_mz = r[1]
                this_rank += 1
            if this_rank == rank:
                return_next = True
                intensity = '%.04f(%u)' % (r[1], r[2])
                ids.append('%s (%.03f)' % (r[7], r[1]))
            elif this_rank != rank and return_next:
                return intensity, '; '.join(ids)
        return '', ''
    
[docs]    def full_list_str(self):
        result = []
        prev_mz = self.scan[0,1]
        intensity = self.scan[0,2]
        names = set([])
        for i, r in enumerate(self.scan):
            if abs(r[1] - prev_mz) > 0.0001:
                if len(names) == 1 and  list(names)[0] == 'unknown':
                    result.append('%s (%.03f) (%u)' % ('/'.join(sorted(list(names))), r[1], intensity))
                else:
                    result.append('%s (%u)' % ('/'.join(sorted(list(names))), intensity))
                names = set([])
                intensity = r[2]
                prev_mz = r[1]
            names.add(r[7])
        result.append('%s (%u)' % ('/'.join(sorted(list(names))), intensity))
        return '; '.join(result)
    
[docs]    def most_abundant_mz(self):
        result = self.scan[0,1]
        self.feature.msg('\t\t  -- Most abundant m/z is %.03f\n' % result)
        return result
    
[docs]    def mz_match(self, mz_detected, mz):
        return abs(mz_detected - mz) <= self.feature.main.ms2_tlr
    
[docs]    def sort_by_mz(self):
        """
        Sorts the scan array by m/z increasing.
        """
        self._order = self._order[self.scan[:,1].argsort()]
        self.scan = self.scan[self.scan[:,1].argsort(),:]
    
[docs]    def sort_by_i(self, return_order = False):
        """
        Sorts the scan array by intensity decreasing.
        """
        if self._order is None:
            order = self.scan[:,2].argsort()[::-1]
            self.scan = self.scan[order,:]
            self._order = np.array(xrange(self.scan.shape[0]), dtype = np.int)
        else:
            order = self._order.argsort()
            self.scan = self.scan[order,:]
            self._order = self._order[order]
        if return_order:
            return order
    
[docs]    def mz_lookup(self, mz):
        """
        Returns the index of the closest m/z value
        detected in the scan if it is within the
        range of tolerance, otherwise None.
        """
        du = 999.0
        dl = 999.0
        self.sort_by_mz()
        ui = self.scan[:,1].searchsorted(mz)
        if ui < self.scan.shape[0]:
            du = self.scan[ui,1] - mz
        if ui > 0:
            dl = mz - self.scan[ui - 1,1]
        i = ui if du < dl else ui - 1
        i = i if self.mz_match(self.scan[i,1], mz) else None
        sort = self.sort_by_i(return_order = True)
        if i is not None:
            i = np.where(sort == i)[0][0]
        return i
    
[docs]    def has_mz(self, mz):
        """
        Tells if an m/z exists in this scan.
        """
        
        result = self.mz_lookup(mz) is not None
        
        self.feature.msg('\t\t  -- m/z %.03f occures in this scan? -- %s\n' % \
            (mz, str(result)))
        
        return result
    
[docs]    def has_nl(self, nl):
        """
        Tells if a neutral loss exists in this scan.
        """
        
        result = self.has_mz(self.ms1_mz() - nl)
        
        self.feature.msg('\t\t  -- neutral loss of %.03f occures in '\
            'this scan? Looked up m/z %.03f - %.03f = %.03f -- %s\n' % \
            (nl, self.feature.tbl['mz'][self.feature.i], nl,
             self.feature.tbl['mz'][self.feature.i] - nl, str(result)))
        
        return result
    
[docs]    def ms1_mz(self):
        """
        Returns the MS1 m/z (which should be the precursor ion).
        """
        
        return self.feature.tbl['mz'][self.feature.i]
    
[docs]    def nl_lookup(self, nl):
        """
        Looks up if a neutral loss exists in this scan and returns its index.
        """
        
        return self.mz_lookup(self.feature.tbl['mz'][self.feature.i] - nl)
    
[docs]    def most_abundant_mz_is(self, mz):
        """
        Tells if the m/z with the highest intensity is `mz`.
        """
        
        result = self.mz_match(self.most_abundant_mz(), mz)
        self.feature.msg('\t\t  -- m/z %.03f is the most abundant? -- %s\n' % \
            (mz, str(result)))
        return result
    
[docs]    def mz_among_most_abundant(self, mz, n = 2):
        """
        Tells if an m/z is among the most aboundant `n` fragments
        in a spectrum.
        
        :param float mz: The m/z value.
        :param int n: The number of most abundant fragments considered.
        
        """
        
        result = False
        
        for i in xrange(min(n, self.scan.shape[0])):
            
            if self.mz_match(self.scan[i,1], mz):
                
                result = True
                break
        
        self.feature.msg('\t\t  -- m/z %.03f is among the %u most abundant? -- '\
            '%s\n' % (mz, n, str(result)))
        
        return result
    
[docs]    def nl_among_most_abundant(self, nl, n = 2):
        """
        Tells if a neutral loss corresponds to one of the
        most aboundant `n` fragments in a spectrum.
        
        :param float nl: The mass of the neutral loss.
        :param int n: The number of most abundant fragments considered.
        
        """
        
        result = False
        
        for i in xrange(min(n, self.scan.shape[0])):
            
            if self.mz_match(self.scan[i,1], self.ms1_mz() - nl):
                
                result = True
                break
        
        self.feature.msg('\t\t  -- neutral loss %.03f is among '\
            'the %u most abundant? -- '\
            '%s\n' % (nl, n, str(result)))
        
        return result
    
[docs]    def get_intensity(self, mz):
        """
        Returns the intensity of a fragment ion from its m/z.
        Value is `None` if m/z does not present.
        """
        
        i = self.mz_lookup(mz)
        
        if i is not None:
            
            return self.intensities[i,2]
        
        return None
    
[docs]    def get_nl_intensity(self, nl):
        """
        Returns the intensity of a fragment ion from its a neutral loss.
        Value is `None` if neutral loss does not present.
        """
        
        return self.get_intensity(self.ms1_mz() - nl)
    
[docs]    def mz_percent_of_most_abundant(self, mz, percent = 80.0):
        """
        Tells if an m/z has at least certain percent of intensity
        compared to the most intensive fragment.
        
        :param float mz: The m/z value.
        :param float percent: The threshold in percent
                              of the highest intensity.
        
        """
        
        insmax = self.scan[0,2]
        result = False
        
        for frag in self.scan:
            
            if self.mz_match(frag[1], mz):
                
                result = True
                break
            
            if frag[2] < insmax * 100.0 / percent:
                result = False
                break
        
        self.feature.msg('\t\t  -- m/z %.03f has abundance at least %.01f %% of'\
            ' the highest abundance? -- %s\n' % \
            (mz, percent, str(result)))
        
        return result
    
[docs]    def fa_type_is(self, i, fa_type, sphingo = False, uns = None,
                   scan_index = True):
        """
        Tells if a fatty acid fragment is a specified type. The type
        should be a part of the string representation of the fragment,
        e.g. `-O]` for fragments with one oxygen loss.
        """
        
        ifa = None
        
        if not scan_index:
            ifa = i
            i   = self.fa_list[ifa][5]
        
        result = (
            (fa_type in self.scan[i,8] or fa_type in self.scan[i,7]) and
            (not sphingo or 'Sphingosine' in self.scan[i,7]) and
            (uns is None or ifa is None or self.fa_list[ifa][0][1] <= uns)
        )
        
        self.feature.msg('\t\t  -- Fragment #%u (%s, %s): fatty acid type '\
            'is %s?  -- %s\n' % \
                (i, self.scan[i,7], self.scan[i,8], fa_type, str(result)))
        
        return result
    
[docs]    def is_fa(self, i, sphingo = False):
        """
        Examines whether a fragment is fatty acid-like or not.
        In the labels of fatty acid fragments we always 
        """
        
        result = 'FA' in self.scan[i,7] or 'Lyso' in self.scan[i,7] or \
            (sphingo and 'Sphi' in self.scan[i,7])
        
        self.feature.msg('\t\t  -- Fragment #%u (%s): is fatty acid? '\
            '-- %s\n' % (i, self.scan[i,7], str(result)))
        
        return result
    
[docs]    def most_abundant_fa(self, fa_type, head = 1, sphingo = False):
        """
        Returns `True` if there is a fatty acid among the most abundant
        fragments and it is of the defined type; `False` if there is no
        fatty acid, or it is different type.
        
        :param str fa_type: The type of the fatty acid fragment ion.
        :param int head: The number of most abundant fragments considered.
        :param bool sphingo: Look for a sphingolipid backbone.
        """
        
        result = False
        
        for i in xrange(self.scan.shape[0]):
            
            if i == head:
                
                break
            
            if self.is_fa(i, sphingo = sphingo):
                
                result = self.fa_type_is(i, fa_type, sphingo = sphingo)
        
        self.feature.msg('\t\t  -- Having fatty acid %s among %u most abundant '\
            'features? -- %s\n' % (fa_type, head, str(result)))
        
        return result
    
[docs]    def get_most_abundant_fa(self, fa_type = None, head = 1, sphingo = False):
        """
        Looks up the most abundant fatty acid fragment of the given type.
        Returns tuple with mz, intensity, carbon count and unsaturation, index.
        """
        
        self.build_fa_list()
        
        for fa_frag in self.fa_list[:head]:
            
            if (
                fa_type is None  or
                self.fa_type_is(fa_frag[5], fa_type, sphingo)
            ):
                
                return self.scan[fa_frag[5],1], fa_frag[4], fa_frag[0], fa_frag[5]
        
        return None, None, None, None
    
[docs]    def fa_cc_among_most_abundant(self, cc, hg, n = 2, sphingo = False):
        """
        Returns `True` if there is one  fatty acid with the defined
        carbon count and unsaturation and compatible with the given
        headgroup among the most abundant `n` fragments.
        """
        
        self.build_fa_list()
        
        for fa_frag in self.fa_list:
            
            if fa_frag[5] >= n:
                
                break
            
            if (
                fa_frag[0] == cc and
                (fa_frag[1] is None or hg in fa_frag[1]) and
                (sphingo or fa_frag[3])
            ):
                
                return True
        
        return False
    
[docs]    def fa_among_most_abundant(self, fa_type, n = 2,
                               min_mass = None, sphingo = False,
                               uns = None):
        """
        Returns `True` if there is one of the defined type of fatty acid
        fragments among the given number of most abundant fragments, and
        it has a mass greater than the given threhold.
        """
        
        self.build_fa_list()
        result = False
        
        for i, fa in enumerate(self.fa_list):
            
            if not sphingo or fa[3] and (
                    min_mass is None or
                    self.scan[fa[5],1] >= min_mass
                ):
                
                if min_mass is not None:
                    
                    self.feature.msg('\t\t\t-- Fragment #%u having mass larger '\
                        'than %.01f\n' % (i, min_mass))
                
                if self.fa_type_is(i, fa_type, sphingo = sphingo,
                                   uns = uns, scan_index = False):
                    result = True
            
            if i == n:
                break
            
            elif min_mass is not None:
                self.feature.msg('\t\t\t-- Fragment #%u having mass lower '\
                        'than %.01f\n' % (i, min_mass))
        
        self.feature.msg('\t\t  -- Having fatty acid fragment %s among %u most '\
            'abundant -- %s\n' % (fa_type, n, str(result)))
        
        return result
    
[docs]    def fa_percent_of_most_abundant(self, fa_type, percent = 80.0, sphingo = False):
        for i in xrange(self.scan.shape[0]):
            if self.is_fa(i, sphingo = sphingo):
                if self.fa_type_is(i, fa_type, sphingo = sphingo):
                    return True
            if self.scan[i,2] < self.insmax * 100.0 / percent:
                return False
        return False
    
[docs]    def mz_most_abundant_fold(self, mz, fold):
        """
        Tells if an m/z is the most abundant fragment
        and it has at least a certain
        fold higher intensity than any other fragment.
        
        :param float mz: The m/z value.
        :param float fold: The m/z must be this times higher than any other.
        """
        
        result = False
        if self.most_abundant_mz_is(mz):
            result = self.scan.shape[0] == 1 or \
                self.scan[1,2] * fold <= self.scan[0,2]
        self.feature.msg('\t\t  -- m/z %.03f is at least %u times higher than '\
            'any other? -- %s\n' % (mz, fold, str(result)))
        return result
    
[docs]    def sum_cc_is(self, cc1, cc2, cc):
        """
        Returns `True` if the sum of the 2 carbon counts and
        unsaturations is equal with the third one.
        
        :param tuple cc1: Carbon count and unsaturation 1.
        :param tuple cc2: Carbon count and unsaturation 2.
        :param str cc: Expected total carbon count and unsaturation.
        """
        
        return self.cc2str(self.sum_cc([cc1, cc2])) == cc
    
[docs]    def cer_fa_test(self, frag1, frag2):
        return \
            self.fa_type_is(frag1[5], 'CerFA(') and \
            self.fa_type_is(frag2[5], 'CerSphi-N(') and \
            frag1[4] > frag2[4] * 2
    
[docs]    def fa_combinations3(self, hg, head = None, expected_intensities = None):
        """
        Finds all combinations of 3 fatty acids which match the
        total carbon count and unsaturation resulted by database
        lookups of the MS1 precursor mass.
        This can be used for example at TAG.
        
        :param str hg: The short name of the headgroup, e.g. `TAG`.
        :param int head: If `None` or `numpy.inf` all fragment ions
                         will be considered, otherwise only the first
                         most aboundant until the number `head`.
        """
        
        result = set([])
        
        if hg in self.feature.ms1fa and len(self.feature.ms1fa[hg]):
            ccs = list(self.feature.ms1fa[hg])
        else:
            return result
        
        head = np.inf if head is None else head
        
        for cc in ccs:
            
            try:
                icc = self.cc2int(cc)
            except AttributeError:
                continue
            
            for frag0 in self.fa_list:
                
                if frag0[5] >= head:
                    break
                
                cc0 = frag0[0]
                
                cc12e = '%u:%u' % tuple(map(lambda x: x[0] - x[1],
                                            zip(*[icc, cc0])))
                
                cc12s = self.fa_combinations_tuples(cc12e, head = head, by_cc = True)
                
                for cc12 in cc12s:
                    
                    cc012 = '/'.join(sorted(list(cc12[0]) + [self.cc2str(cc0)]))
                    
                    if self.sum_cc_str(cc012) == icc:
                    
                        if self.intensity_ratios([
                            (cc12[1][0], cc12[2][0]),
                            (cc12[1][1], cc12[2][1]),
                            (frag0[4], frag0[5])],
                            expected = expected_intensities
                        ):
                            
                            result.add(cc012)
        
        return result
    
[docs]    def intensity_ratios(self, intensities, expected = None, logbase = 1.5):
        """
        Tells if the ratio of a list of intensities fits
        the one in `expected` or is even if `expected` is `None`.
        
        :param list intensities: List of tuples, first element is the
                                 intensity, the second is an uniqe
                                 identifier of the fragments.
        :param list expected: List with expected intensity proportions.
                              E.g. `[1, 1, 2]` means the third ion is
                              twice higher intense than the 2 others.
        :param int logbase: The fold difference tolerance when comparing
                            intensities. E.g. if this is 2, then an almost
                            twice less or more intense ion will considered
                            to have similar intensity.
        
        """
        
        if len(intensities) == 1:
            
            return True
        
        i = intensities
        
        if any(map(lambda ii: ii[0] <= 0.0, i)):
            
            return False
        
        # to know if one fragment contributes more than one times;
        # intensities divided by the times the fragment is incident
        cntr = collections.Counter(map(lambda ii: ii[1], i))
        
        # by default expecting more or less equal intensities
        if expected is None:
            
            expected = [1.0] * len(i)
        
        i = list(
            map(
                lambda ii:
                    (ii[1][0] / (expected[ii[0]] * cntr[ii[1][1]]), ii[1][1]),
                enumerate(i)
            )
        )
        
        return (
            all(
                map(
                    lambda co:
                        (
                            (math.log(co[0][0], logbase) -
                            math.log(co[1][0], logbase)) <= 1
                        ),
                    itertools.combinations(i, 2)
                )
            )
        )
    
[docs]    def fa_combinations_old(self, hg, sphingo = False,
                            head = None, by_cc = False):
        """
        Finds all combinations of 2 fatty acids which match the
        total carbon count and unsaturation resulted by database
        lookups of the MS1 precursor mass.
        Alternatively a carbon count and unsaturation can be provided
        if `by_cc` is set to `True`.
        
        :param str hg: Short name of the headgroup, e.g. `PC`; or cc:unsat e.g.
                       `32:1` if `by_cc` is `True`.
        :param bool sphingo: Assume sphingolipid.
        :param int head: If `None` the total fragment list used, if a number,
                         only the most intensive fragments accordingly.
        :param bool by_cc: Use the MS1 database identification to find out
                           the possible carbon counts and unsaturations for
                           the given headgroup, or a cc:uns provided and
                           search combinations accordingly.
        
        """
        
        result = set([])
        if hg in self.feature.ms1fa and len(self.feature.ms1fa[hg]):
            ccs = list(self.feature.ms1fa[hg])
        elif by_cc:
            ccs = [hg]
        else:
            return result
        
        head = np.inf if head is None else head
        
        self.build_fa_list()
        
        for cc in ccs:
            
            for frag1 in self.fa_list:
                
                for frag2 in self.fa_list:
                    
                    result.update(
                        self.get_fa_combinations(frag1, frag2, hg,
                                                cc, sphingo, head)
                    )
        
        return result
    
[docs]    def fa_combinations_preprocess(self, regenerate = False):
        """
        Generates a lookup table for all possible combinations of two
        fatty acids.
        """
        
        if not hasattr(self, 'fa_co_2') or regenerate:
            
            self.fa_co_2 = {}
            l = self.fa_list
            
            for i, j in itertools.combinations_with_replacement(
                xrange(len(self.fa_list)), 2):
                
                key = self.sum_cc([(l[i][0][0], l[i][0][1]),
                                   (l[j][0][0], l[j][0][1])])
                
                if key not in self.fa_co_2:
                    self.fa_co_2[key] = set([])
                
                self.fa_co_2[key].add((i, j))
    
[docs]    def fa_combinations(self, hg, sphingo = False,
                               head = None, by_cc = False):
        """
        Finds all combinations of 2 fatty acids which match the
        total carbon count and unsaturation resulted by database
        lookups of the MS1 precursor mass.
        Alternatively a carbon count and unsaturation can be provided
        if `by_cc` is set to `True`.
        
        This method does the same as `fa_combinations` but works with
        a preprocessed lookup table.
        
        Returns set of strings.
        
        :param str hg: Short name of the headgroup, e.g. `PC`; or cc:unsat e.g.
                       `32:1` if `by_cc` is `True`.
        :param bool sphingo: Assume sphingolipid.
        :param int head: If `None` the total fragment list used, if a number,
                         only the most intensive fragments accordingly.
        :param bool by_cc: Use the MS1 database identification to find out
                           the possible carbon counts and unsaturations for
                           the given headgroup, or a cc:uns provided and
                           search combinations accordingly.
        
        """
        
        return set(map(lambda co: '%s/%s' % co[0],
                       self.fa_combinations_tuples(hg, sphingo, head, by_cc)))
    
    
[docs]    def fa_combinations_tuples(self, hg, sphingo = False,
                               head = None, by_cc = False):
        """
        Finds all combinations of 2 fatty acids which match the
        total carbon count and unsaturation resulted by database
        lookups of the MS1 precursor mass.
        Alternatively a carbon count and unsaturation can be provided
        if `by_cc` is set to `True`.
        
        Returns tuples of tuples with carbon count/unsaturation,
        intensities and indices.
        
        :param str hg: Short name of the headgroup, e.g. `PC`; or cc:unsat e.g.
                       `32:1` if `by_cc` is `True`.
        :param bool sphingo: Assume sphingolipid.
        :param int head: If `None` the total fragment list used, if a number,
                         only the most intensive fragments accordingly.
        :param bool by_cc: Use the MS1 database identification to find out
                           the possible carbon counts and unsaturations for
                           the given headgroup, or a cc:uns provided and
                           search combinations accordingly.
        
        """
        
        result = []
        if hg in self.feature.ms1fa and len(self.feature.ms1fa[hg]):
            ccs = list(self.feature.ms1fa[hg])
        elif by_cc:
            ccs = [hg]
        else:
            return result
        
        head = np.inf if head is None else head
        
        self.build_fa_list()
        self.fa_combinations_preprocess()
        
        for cc in ccs:
            
            icc = self.cc2int(cc)
            
            if icc in self.fa_co_2:
                
                for i, j in self.fa_co_2[icc]:
                    
                    frag1 = self.fa_list[i]
                    frag2 = self.fa_list[j]
                    
                    result.extend(
                        self.get_fa_combinations(frag1, frag2, hg,
                                                 cc, sphingo, head)
                    )
        
        return result
    
[docs]    def get_fa_combinations(self, frag1, frag2, hg, cc, sphingo, head):
        """
        Processes two fatty acid fragments to decide
        if their combination is valid.
        """
        
        result = []
        
        if frag1[5] >= head or frag2[5] >= head:
            return result
        
        if hg == 'Cer' and not self.cer_fa_test(frag1, frag2):
            # where not the 'CerFA' is the most intensive
            # those are clearly false
            return result
        
        if frag1[0][0] is not None and frag2[0][0] is not None and \
            (frag1[1] is None or hg in frag1[1]) and \
            (frag2[1] is None or hg in frag2[1]) and \
            (not sphingo or frag1[3] or frag2[3]):
            if self.sum_cc_is(frag1[0], frag2[0], cc):
                ether_1 = 'O-' if frag1[2] else ''
                ether_2 = 'O-' if frag2[2] else ''
                fa_1 = '%s%u:%u' % (ether_1, frag1[0][0], frag1[0][1])
                fa_2 = '%s%u:%u' % (ether_2, frag2[0][0], frag2[0][1])
                if frag1[3]:
                    fa_1 = 'd%s' % fa_1
                elif frag2[3]:
                    sph = 'd%s' % fa_2
                    fa_2 = fa_1
                    fa_1 = sph
                if not frag1[3] and not frag2[3]:
                    fa = tuple(sorted([fa_1, fa_2]))
                else:
                    fa = (fa_1, fa_2)
                
                result.append((
                    fa,
                    (frag1[4], frag2[4]),
                    (frag1[5], frag2[5])
                ))
        
        return result
    
[docs]    def matching_fa_frags_of_type(self, hg, typ, sphingo = False,
        return_details = False):
        """
        Returns carbon counts of those fragments which are of the given type
        and have complement fatty acid fragment of any type.
        
        Details is a dict with carbon counts as keys
        and fragment names as values.
        """
        result = set([])
        details = {}
        
        if hg in self.feature.ms1fa and len(self.feature.ms1fa[hg]):
            
            for cc in self.feature.ms1fa[hg]:
                
                self.build_fa_list()
                
                for frag1 in self.fa_list:
                    
                    for frag2 in self.fa_list:
                        
                        if frag1[0][0] is not None and \
                            frag2[0][0] is not None and \
                            (frag1[1] is None or hg in frag1[1]) and \
                            (frag2[1] is None or hg in frag2[1]) and \
                            (not sphingo or frag1[3]):
                            
                            if self.fa_type_is(frag1[5], typ) and \
                                self.sum_cc_is(frag1[0], frag2[0], cc):
                                
                                result.add(frag1[0])
                                
                                if return_details:
                                    
                                    if frag1[0] not in details:
                                        
                                        details[frag1[0]] = set([])
                                    
                                    details[frag1[0]].add(self.scan[frag2[5],7])
        
        if return_details:
            return (result, details)
        else:
            return result
    
[docs]    def cer_missing_fa(self, cer_hg):
        """
        Infers the fatty acid carbon count and unsaturation
        by subtracting the sphingoid backbone from the total.
        This works with Cer, CerP and HexCer.
        """
        
        result = set([])
        
        cer_ccs = set([])
        
        for frag in self.scan[:5]:
            
            if 'phingo' in frag[7]:
                
                cer_ccs.add(self.get_cc(frag[7]))
        
        if cer_hg in self.feature.ms1fa:
            
            for cc in self.feature.ms1fa[cer_hg]:
                
                cc = self.get_cc(cc)
                
                for cer_cc in cer_ccs:
                    
                    carb = cc[0] - cer_cc[0]
                    unsat = cc[1] - cer_cc[1]
                    
                    result.add('d%u:%u/%u:%u' % (
                        cer_cc[0], cer_cc[1], carb, unsat))
        
        return result
    
[docs]    def cer_matching_fa(self, cer_fa):
        score = 0
        if 'Cer' in self.feature.ms1fa:
            cer_cc = self.get_cc(cer_fa)
            for cc in self.feature.ms1fa['Cer']:
                cc = self.get_cc(cc)
                carb = cc[0] - cer_cc[0]
                unsat = cc[1] - cer_cc[1] + 2
                if self.frag_name_present(
                    '[FA-alkyl(C%u:%u)-H]-' % (carb, unsat)):
                    score += 1
                carb = cc[0] - cer_cc[0] - 2
                unsat = cc[1] - cer_cc[1] + 1
                if self.frag_name_present(
                    '[FA-alkyl(C%u:%u)-H]-' % (carb, unsat)):
                    score += 1
        return score
    
[docs]    def build_fa_list(self, rebuild = False):
        """
        Returns list with elements:
            carbon count, headgroups (set or None),
            esther (False) or ether (True),
            sphingosine (True) or fatty acid (False),
            fragment intensity and row index
        """
        if self.fa_list is None or rebuild:
            self.fa_list = []
            for i, frag in enumerate(self.scan):
                if frag[7] != 'unknown' and self.is_fa(i, sphingo = True):
                    cc = self.get_cc(frag[7])
                    hgs = self.get_hg(frag[7])
                    is_ether = 'alk' in frag[7]
                    is_sphingo = 'Sphi' in frag[7]
                    self.fa_list.append([cc, hgs, is_ether, is_sphingo, frag[2], i])
    
[docs]    def get_hg(self, frag_name):
        hgfrags = self.feature.main.nHgfrags \
            if self.feature.mode == 'neg' \
            else self.feature.main.pHgfrags
        return hgfrags[frag_name] \
            if frag_name in hgfrags and \
                len(hgfrags[frag_name]) \
            else None
    
[docs]    def get_cc(self, fa):
        """
        Extracts carbon count from any string, for example fatty acid names.
        Recognizes the pattern [number:number].
        E.g. from `Phosphatidylcholine (36:1)` returns the tuple `(36, 1)`.
        To convert pure cc:uns strings, use the method `cc2int` instead,
        as that one is faster.
        
        :param str fa: Any string containing carbon count and unsaturation.
        
        """
        
        m = self.recc.match(fa)
        
        if m is not None:
            return tuple(map(int, m.groups()))
        
        return (None, None)
    
[docs]    def most_abundant_fa_cc(self, fa_type = None, head = 2):
        fa_cc = []
        for i, frag in enumerate(self.scan):
            if i == head:
                break
            if self.is_fa(i) and (
                    fa_type is None or
                    self.fa_type_is(i, fa_type)
                ):
                
                cc = self.get_cc(frag[7])
                if cc[0] is not None:
                    fa_cc.append((cc, frag[2]))
        
        return fa_cc
    
[docs]    def cc2str(self, cc):
        """
        Converts carbon count and unsaturation from tuple of integers
        to string. E.g. `(18, 1)` results `18:1`.
        
        :param tuple cc: Tuple of 2 integers representing carbon count
                         and unsaturation.
        """
        
        return '%u:%u' % cc
    
[docs]    def ccs2str(self, ccs):
        """
        Converts multiple carbon counts and unsaturations from tuples
        of integers format to string. E.g. `[(18, 1), (18, 0)]` results
        `18:1/18:0`.
        
        :param list ccs: List of tuples of integers.
        """
        
        return '/'.join(map(self.cc2str, sorted(ccs)))
    
[docs]    def cc2int(self, cc):
        """
        Converts carbon count and unsaturation from string format to
        tuple of integers. E.g. `18:1` results `(18, 1)`.
        
        :param str cc: String representing carbon count and unsaturation
                       separated by colon.
        """
        
        return tuple(map(int, cc.split(':')))
    
[docs]    def ccs2int(self, ccs):
        """
        Converts a string of multiple carbon counts and unsaturations
        to a list of tuples of integers.
        
        :param str ccs: Multiple carbon counts and unsaturations in string
                        representation, e.g. `18:1/16:0`.
        
        """
        
        return list(map(self.cc2int, ccs.split('/')))
    
[docs]    def sum_cc_str(self, ccs):
        """
        Returns the sum of multiple carbon counts and unsaturations.
        Accepts string format and results string format.
        
        :param str ccs: Multiple carbon counts and unsaturations in string
                        representation, e.g. `18:1/16:0`.
        """
        
        return self.sum_cc(self.ccs2int(ccs))
    
[docs]    def sum_cc(self, ccs):
        """
        Adds numeric carbon counts and unsaturations.
        Accepts a list of tuples of integers, returns
        a tuple of integers.
        
        :param list ccs: A list with tuples of integers,
                         e.g. `[(14, 1), (16, 0)]`.
        """
        
        return (
            tuple(
                reduce(
                    lambda cu1, cu2:
                        # here `cu`: carbon count and unsaturation
                        (cu1[0] + cu2[0], cu1[1] + cu2[1]),
                    ccs
                )
            )
        )
    
[docs]    def sum_cc2(self, ccs):
        """
        Returns the total carbon count and unsaturation in tuple
        format from a list of tuples where the first element of
        the tuple is another tuple with the cc and uns, and the
        second is the intensity, which is discarded here.
        
        :param list ccs: List of the format described above. E.g.
                         `[((16, 0), 1000.0), ((18, 1), 722)]`, this
                         results `(34, 1)`.
        """
        
        return self.sum_cc(map(lambda cci: cci[0], ccs))
    
[docs]    def sum_cc2str(self, ccs):
        """
        Returns the total carbon count and unsaturation in string
        format from a list of tuples where the first element of
        the tuple is another tuple with the cc and uns, and the
        second is the intensity, which is discarded here.
        
        :param list ccs: List of the format described above. E.g.
                         `[((16, 0), 1000.0), ((18, 1), 722)]`, this
                         results `34:1`.
        """
        
        return self.cc2str(self.sum_cc2(ccs))
    
[docs]    def add_fa1(self, fa, hg):
        if hg not in self.fa1:
            self.fa1[hg] = set([])
            self.fa1[hg].add(
                tuple(
                    map(
                        lambda fai:
                            fai[0],
                        fa
                    )
                )
            )
            fastr = ', '.join(
                    map(
                        lambda fai:
                            self.cc2str(fai[0]),
                        fa
                    )
                )
            self.feature.msg('\t\t  -- Adding fatty acids %s at headgroup '\
                '%s\n' % (fastr, hg))
    
[docs]    def fa_ccs_agree_ms1(self, hg, fa_type = None, head = 2):
        fa_cc = self.most_abundant_fa_cc(fa_type = fa_type, head = head)
        if len(fa_cc) > 0:
            cc = self.sum_cc2str([fa_cc[0]] * 2)
            agr = self.fa_cc_agrees_ms1(cc, hg)
            if agr:
                self.add_fa1(fa_cc[:1], hg)
            if len(fa_cc) > 1:
                cc = self.sum_cc2str(fa_cc[:2])
                agr = self.fa_cc_agrees_ms1(cc, hg)
                if agr:
                    self.add_fa1(fa_cc[:2], hg)
        return hg in self.fa
    
[docs]    def fa_cc_agrees_ms1(self, cc, hg):
        result = False
        if hg in self.feature.ms1fa and cc in self.feature.ms1fa[hg]:
            if hg not in self.feature.fa:
                self.feature.fa[hg] = set([])
            if hg not in self.fa:
                self.fa[hg] = set([])
            self.feature.fa[hg].add(cc)
            self.fa[hg].add(cc)
            result = True
        self.feature.msg('\t\t  -- Carbon count from MS2: %s; from databases '\
            'lookup: %s -- Any of these matches: %s\n' % \
                (
                    cc,
                    str(self.feature.ms1fa[hg]) \
                        if hg in self.feature.ms1fa else '-',
                    str(result))
                )
        return result
    
[docs]    def frag_name_present(self, name):
        
        return name in self.scan[:,7]
    
    #### New methods
    
[docs]    def cer1p_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a Ceramide-1-phosphate.

        **Specimen:**
        
        - GLTPD1 - 616.47
      
        **Principle:**
        
        - The most abundant fragment is 78.9591 metaphosphate.
      - If 96.9696 phosphate present adds to the score.
        
        """
        
        score = 0
        fattya = set([])
        if self.most_abundant_mz_is(78.95905658):
            score += 5
            if self.has_mz(96.96962158):
                score += 1
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def hexcer_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a Hexosyl-Ceramide.

        **Specimen:**
        
        - GLTP - 744.5627
      
        **Principle:**
        
        - Hexose fragments 71.0115, 89.0220 and 101.0219 must present.
      
        """
        
        score = 0
        fattya = set([])
        
        if all(map(lambda mz: self.mz_among_most_abundant(mz, n = 10),
                   # these are 3 fragments found at GLTP
                   [71.0115000, 89.0220000, 101.021900])):
            
            score += 5
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def hexceroh_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a Hexosyl-Ceramide-OH
        ('t'). This method is the same as `hexcer_neg_1`.

        **Specimen:**
        
        - GLTP - 760.557
      
        **Principle:**
        
        - Hexose fragments 71.0115, 89.0220 and 101.0219 must present.
      
        """
        
        return self.hexcer_neg_1()
    
[docs]    def hexcer_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum is a Hexosyl-Ceramide.

        **Specimen:**
        
        - GLTP + 810.68
      
        **Principle:**
        
        - Hexose fragments 198.0740, 180.0634 and 162.0528 must present.
      
        """
        
        score = 0
        fattya = set([])
        
        hexfrags = sum(map(lambda nl: self.nl_among_most_abundant(nl, n = 15),
                           [198.073955, 180.06339, 162.052825]))
        
        if hexfrags:
            score += hexfrags + 4
        
        if score:
            
            fattya.update(self.cer_missing_fa('HexCer'))
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def hexceroh_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum is a Hexosyl-Ceramide-OH
        (`t`). This method is the same as `hexcer_pos_1`.

        **Specimen:**
        
        - GLTP + 826.67
      
        **Principle:**
        
        - Hexose fragments 198.0740, 180.0634 and 162.0528 must present.
      
        """
        
        return self.hexcer_pos_1()
    
[docs]    def cer1p_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum is a Ceramide-1-phosphate.

        **Specimen:**
        
        - GLTPD1 + 728.59
      
        **Principle:**
        
        - A shpingosine backbone with 2 H2O loss must be among the 3 highest
        intensity fragments.
        - Presence of any of the following fragments increases the score:
        82.0651, 107.0729, 135.1043, 149.1199.
        
        """
        
        score = 0
        fattya = set([])
        
        if self.fa_among_most_abundant('-H2O-H2O+]+', n = 3, sphingo = True):
            score += 4
            
            if any(map(self.has_mz,
                       # these present at Cer too
                       # a specific difference needed!
                       [82.0651257, 107.072951, 135.104251, 149.119901])):
                score += 1
            
            fattya.update(self.cer_missing_fa('Cer1P'))
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def dag_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum is a DAG.

        **Specimen:**
        
        - SEC14L2 + 584.52
      - in vitro: BNIP2 + 770.67
        
        **Principle:**
        
        - Combination of fatty acid fragments among the 10 most abundant
        fragments must match the expected carbon count and unsaturation.
        - If these are among the 5 highest fragments the score is higher.
      
        """
        
        score = 0
        fattya = set([])
        
        if(self.fa_combinations('DAG', head = 10)):
            score += 4
            
            if(self.fa_combinations('DAG', head = 6)):
                
                score += 2
            
            fattya.update(self.fa_combinations('DAG'))
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def dag_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a DAG.

        **Specimen:**
        
        - We don't have yet.
      
        **Principle:**
        
        - Combination of fatty acid fragments among the 10 most abundant
        fragments must match the expected carbon count and unsaturation.
        - If these are among the 5 highest fragments the score is higher.
      
        """
        
        score = 0
        fattya = set([])
        
        if(self.fa_combinations('DAG', head = 10)):
            score += 4
            
            if(self.fa_combinations('DAG', head = 6)):
                
                score += 2
            
            fattya.update(self.fa_combinations('DAG'))
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def tag_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a TAG.

        **Specimen:**
        
        - We don't have yet.
      
        **Principle:**
        
        - Combination of fatty acid fragments must match the
        expected carbon count and unsaturation.
        
        """
        
        score = 0
        fattya = set([])
        
        fattya.update(self.fa_combinations3('TAG'))
        
        if fattya:
            score += 5
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def tag_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum is a TAG.

        **Specimen:**
        
        - STARD11 + 818.7187
      
        **Principle:**
        
        - Combination of fatty acid fragments must match the expected
        carbon count and unsaturation.
        
        """
        
        score = 0
        fattya = set([])
        
        fattya.update(self.fa_combinations3('TAG'))
        
        if fattya:
            score += 5
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def pi_pos_1(self):
        """
        Examines if a negative MS2 spectrum is Phosphatidylinositol.

        **Specimen:**
        
        - SEC14L2 + 906.60 and 882.6
      
        **Principle:**
        
        - Combinations of fatty acid fragments must match the expected
        carbon count and unsaturation for PI.
        - Presence of neutral losses 259.0219 and 277.0563 adds to the score.
      
        """
        
        score = 0
        fattya = set([])
        
        fattya.update(self.fa_combinations('PI'))
        if fattya:
            score += 1
            if self.has_nl(259.021894):
                score += 4
            if self.has_nl(277.056272):
                score += 4
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def ps_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum is a Phosphatidylserine.

        **Specimen:**
        
        - BPI + 790.56
      
        **Principle:**
        
        - PS headgroup neutral loss 185.0089 must be the highest intensity.
      
        """
        
        score = 0
        fattya = set([])
        
        if self.nl_among_most_abundant(185.008927, 1):
            
            score += 5
            
            fattya.update(self.fa_combinations('PS'))
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def bmp_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum
        is a Bismonoacylglycerophosphate.

        **Specimen:**
        
        - BPIFB2 + 792.57
      
        **Principle:**
        
        - A glycerol+fatty acid fragment can be found among the 3 highest?
      - The PG headgroup neutral loss (189.0402) is among the fragments?
        - If so, does it have a lower intensity than half of the fatty
        acid+glycerol fragment?
        
        """
        
        score = 0
        fattya = set([])
        
        if self.fa_among_most_abundant('+G(', 3):
            fattya.update(self.fa_combinations('BMP'))
            if fattya:
                score += 4
            
            hg_int = self.get_nl_intensity(189.0402)
            
            if hg_int:
                
                gfa_highest = self.get_most_abundant_fa('+G(', head = 4)
                
                if gfa_highest[1] < hg_int * 2:
                    
                    score = 0
                    fattya = set([])
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def pg_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum
        is a Phosphatidylglycerol.
        At in vivo observed only in standard.
        
        **Principle:**
        
        - The PG headgroup neutral loss (189.0402) is the fragment ion
        with the highest intensity?
        
        """
        
        score = 0
        fattya = set([])
        
        if self.nl_among_most_abundant(189.0402, 1):
            
            score += 5
            
            fattya.update(self.fa_combinations('PG'))
            if fattya:
                score += 4
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def va_pos_1(self):
        """
        Examines if a positive MS2 spectrum is vitamin A (retinol).

        **Specimen:**
        
        - RBP1 + 269.2245
      - RBP4 + 269.2245
        
        **Principle:**
        
        - The most abundant ion is the whole molecule m/z = 269.224.
      - Presence off 3 other ions adds to the score but not
          mandatory: 213.165, 145.1027, 157.1028.
        
        """
        
        score = 0
        fattya = set([])
        
        if self.mz_among_most_abundant(269.224, 3):
            score += 5
            score += sum(map(self.has_mz, [213.165, 145.1027, 157.1028]))
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def bmp_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is Phosphatidylglycerol.
        The result will be the same as `bmp_neg_1`, as in negative
        mode we do not know a way to distinguish these species.
        

        **Specimen:**
        
        - GM2A - 799.54
      - BPIFB2 - 773.5258 (might be BMP)
        
        **Principle:**
        
        - The most abundant fragment is a fatty acid [M-H]- ion.
      - The 152.9958 glycerophosphate fragment must be present.
        - If Lyso-PG fragment present with carbon count complementing
        the [M-H]- fatty acid score is higher.
        - Presence of 171.0064 headgroup fragment adds to the score.
      
        """
        
        return self.pg_neg_1()
    
    #### End: new methods
    
[docs]    def pe_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is Phosphatidylethanolamine.

        **Specimen:**
        
        - GM2A - 714.507
      
        **Principle:**
        
        - The most abundant fragment is a fatty acid [M-H]- ion.
      - 140.0118 PE headgroup must be present.
        - Other headgroup ions 196.0380 and 178.0275 add to the score.
      - Lyso-PE and [M-H-CO2]- fatty acid fragments complementing the
          highest [M-H]- fatty acid increase the score.
        
        """
        
        score = 0
        fattya = set([])
        
        if (
            self.is_fa(0) and
            self.fa_type_is(0, '-H]-') and
            self.has_mz(140.0118206) and
            not self.lysope_neg_1()['score']
        ):
            
            score += 5
            fattya = self.fa_combinations('PE')
            
            if self.has_mz(196.0380330):
                score += 1
            
            if self.has_mz(178.0274684):
                score += 1
            
            fa_h_ccs = self.matching_fa_frags_of_type('PE', '-H]-')
            
            for fa_h_cc in fa_h_ccs:
                
                for fa_other in [
                    '[Lyso-PE(C%u:%u)-]-',
                    '[Lyso-PE-alkyl(C%u:%u)-H2O]-',
                    '[Lyso-PE-alkyl(C%u:%u)-]-',
                    '[FA(C%u:%u)-H-CO2]-'
                ]:
                    
                    if self.frag_name_present(fa_other % fa_h_cc):
                        score += 1
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def lysope_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is
        Lysophosphatidylethanolamine.

        **Specimen:** 
        
        - in vitro FABP1 - 464.27
      
        **Principle:**
        
        - The most abundant fragment is a fatty acid [M-H]- ion.
      - 140.0118 PE headgroup must be present.
        - The carbon count and unsaturation of the highest fatty acid
        fragment must be the same as it is expected for the whole PE molecule.
        - Other headgroup ions 196.0380 and 178.0275 add to the score.
      
        """
        
        score = 0
        fattya = set([])
        
        if (
            self.is_fa(0) and
            self.fa_type_is(0, '-H]-') and
            self.has_mz(140.0118206)
        ):
            score += 5
            
            if self.has_mz(196.0380330):
                score +=1
            
            if self.has_mz(178.0274684):
                score += 1
            
            ccs = self.ms1_cc(['PE', 'LysoPE'])
            
            for cc in ccs:
                
                if len(self.fa_list) and self.fa_list[0][0] == self.cc2int(cc):
                    
                    score += 3
                    fattya.add(cc)
            
            if not fattya:
                score = 0
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def pc_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a Phosphatidylcholine.

        **Specimen:**
        
        - BPI - 804.57
      
        **Principle:**
        
        - 168.0431 phosphate+choline-CH3 fragment must be present.
      - The highest abundant fragment must be a fatty acid [M-H]- fragment.
        - Lyso-PC fragments complementing the highest [M-H]- fatty acid
        increase the score.
        
        """
        
        score = 0
        fattya = set([])
        
        if self.is_fa(0) and self.fa_type_is(0, '-H]-') and self.has_mz(168.0431206):
            
            score += 5
            fattya = self.fa_combinations('PC')
            fa_h_ccs = self.matching_fa_frags_of_type('PC', '-H]-')
        
            for fa_h_cc in fa_h_ccs:
                
                if self.frag_name_present('[Lyso-PC(c%u:%u)-]-' % fa_h_cc):
                    score += 1
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def pi_neg_1(self):
        """
        Examines if a negative MS2 spectrum is Phosphatidylinositol.

        **Specimen:**
        
        - GM2A - 835.52
      
        **Principle:**
        
        - Inositolphosphate-H2O fragment 241.0119, metaphosphate 78.9591 and
        headgroup fragment 152.9958 must be present.
        - Additional headgroup fragments 96.9696, 259.0224 and 297.0381
        increase the score.
        - Presence of Lyso-PI fragments complementing other [M-H]- fatty
        acid fragments increase the score.
        
        """
        
        score = 0
        fattya = set([])
        
        if self.has_mz(241.0118779) and self.has_mz(152.9958366) and \
            self.has_mz(78.95905658):
            
            score += 5
            fattya = self.fa_combinations('PI')
            for hgfrag_mz in [96.96962158, 259.0224425, 297.0380926]:
                if self.has_mz(hgfrag_mz):
                    score += 1
            fa_h_ccs = self.matching_fa_frags_of_type('PI', '-H]-')
            for fa_h_cc in fa_h_ccs:
                for fa_other in [
                    '[Lyso-PI(C%u:%u)-]-',
                    '[Lyso-PI(C%u:%u)-H2O]-]']:
                    if self.frag_name_present(fa_other % fa_h_cc):
                        score += 1
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def ps_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a Phosphatidylserine.

        **Specimen:**
        
        - ORP9 - 788.54
      
        **Principle:**
        
        - The most abundant fragment is an [M-H]- fatty acid fragment.
      - Glycerophosphate fragment 152.9958 must be present.
        - Metaphosphate 78.9591 increases the score.
      - Serine-H2O neutral loss 87.0320 adds to the score.
        - Presence of Lyso-PS and Lyso-PA fragments complementing
        the highest [M-H]- fatty acid fragment increase the score.
        
        """
        
        score = 0
        fattya = set([])
        
        if self.is_fa(0) and self.fa_type_is(0, '-H]-') and \
            self.mz_among_most_abundant(152.9958366, 5):
            
            score += 5
            fattya = self.fa_combinations('PS')
            
            if not fattya:
                score = 0
                return {'score': score, 'fattya': fattya}
            
            if self.has_mz(87.03202840):
                score += 1
            
            if self.has_mz(78.95905658):
                score += 1
            
            fa_h_ccs = self.matching_fa_frags_of_type('PS', '-H]-')
            
            for fa_h_cc in fa_h_ccs:
                
                for fa_other in [
                    '[Lyso-PS(C%u:%u)-]-',
                    '[Lyso-PA(C%u:%u)-]-']:
                    
                    if self.frag_name_present(fa_other % fa_h_cc):
                        score += 1
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def pg_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is Phosphatidylglycerol.
        The result will be the same as `bmp_neg_1`, as in negative
        mode we do not know a way to distinguish these species.
        

        **Specimen:**
        
        - GM2A - 799.54
      - BPIFB2 - 773.5258 (might be BMP)
        
        **Principle:**
        
        - The most abundant fragment is a fatty acid [M-H]- ion.
      - The 152.9958 glycerophosphate fragment must be present.
        - If Lyso-PG fragment present with carbon count complementing
        the [M-H]- fatty acid score is higher.
        - Presence of 171.0064 headgroup fragment adds to the score.
      
        """
        
        score = 0
        fattya = set([])
        
        if self.is_fa(0) and self.fa_type_is(0, '-H]-') and \
            self.has_mz(152.9958366):
            
            score += 5
            
            #if self.mz_among_most_abundant(152.9958366, 5):
            #   score -= 3
            
            fattya = self.fa_combinations('PG')
            
            if self.has_mz(171.0064016):
                score += 1
            
            fa_h_ccs = self.matching_fa_frags_of_type('PG', '-H]-')
            
            for fa_h_cc in fa_h_ccs:
                
                for fa_other in [
                    'Lyso-PG(C%u:%u)-]-',
                    'Lyso-PG(C%u:%u)-H2O]-']:
                    
                    if self.frag_name_present(fa_other % fa_h_cc):
                        score += 1
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def sm_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a Sphingomyeline.

        **Specimen:**
        
        - GLTPD1 - 745.55
      
        **Principle:**
        
        - Must have a neutral loss of CH3+COOH (60.0211).
      - Phosphate+choline-CH3 fragment 168.0431 must be present.
        
        """
        
        score = 0
        fattya = set([])
        
        if self.mz_among_most_abundant(168.0431206) and self.has_nl(60.02113):
            score += 5
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def sph1p_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a Spingosine-1-phosphate.

        **Specimen:**
        
        - Only observed in standard.
      
        **Principle:**
        
        - Phosphate 78.9590 must be present.
      
        """
        
        score = 0
        fattya = set([])
        
        if self.has_mz(78.95905658):
            score += 5
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def cer_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is Ceramide.

        **Specimen:**
        
        - SEC14L1 - 582.509
      
        **Principle:**
        
        - A Ceramide backbone fragment must be among the 2 most abundant.
      - Ceramide backbone fragments lighter by N or C2N but same carbon
          count and unsaturation add to the score.
        
        """
        
        score = 0
        fattya = set([])
        
        if self.fa_among_most_abundant('CerFA', n = 2):
            
            score += 5
            fattya = self.fa_combinations('Cer', sphingo = True)
            fa_h_ccs = self.matching_fa_frags_of_type('Cer', 'CerFA(')
            
            for fa_h_cc in fa_h_ccs:
                
                for fa_other in [
                    '[CerFA-N(C%u:%u)-]-',
                    '[CerFA-C2N(C%u:%u)-]-']:
                    
                    if self.frag_name_present(fa_other % fa_h_cc):
                        
                        score += 1
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def cerp_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a Ceramide-1-phosphate.
        Gives similar result as Sphingosine-1-phosphate.

        **Specimen:**
        
        - GLTPD1 - 616.47
      
        **Principle:**
        
        - The most abundant fragment must be 78.9591 metaphosphate.
      - Presence of 96.9696 phosphate increase the score.
        
        """
        
        score = 0
        fattya = set([])
        
        if self.most_abundant_mz_is(78.95905658):
            
            score += 5
            
            if self.has_mz(96.96962158):
                
                score += 1
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def pc_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum is a Phosphatidylcholine.

        **Specimen:**
        
        - BPI + 786.607
      
        **Principle:**
        
        - The most abundant fragment must be choline+phosphate 184.0733.
      - The 86.0964 ethyl-triethylammonium must be present.
        - The most abundant fatty acid can not have the same carbon count
        and unsaturation as the whole molecule (then it is Lyso-PC).
        - Fragments 104.1069, 124.9998, 60.0808 and 58.0651 increase the
        score.
        
        """
        
        score = 0
        fattya = set([])
        
        if (
            self.most_abundant_mz_is(184.073323) and
            self.has_mz(86.096425) and
            not self.lysopc_pos_1()['score']
        ):
            
            score += 5
            fattya = self.fa_combinations('PC')
            
            if self.has_mz(104.106990):
                score += 1
            if self.has_mz(124.999822):
                score += 1
            if self.has_mz(60.080776):
                score +=1
            if self.has_mz(58.065126):
                score += 1
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def lysopc_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum is a Lysophosphatidylcholine.
        
        **Specimen:**
        
        - in vitro FABP1 + 522.36
      
        **Principle:**
        
        - Choline-phosphate 184.0733, ethyl-triethylammonium 86.0964 and
        neutral loss 183.0660 must be present.
        - The latter neutral loss corresponds to a fatty acid+glycerol ion.
      - The carbon count and unsaturation of this fragment should match
          that of the whole molecule.
        
        """
        
        score = 0
        fattya = set([])
        
        if (
            self.most_abundant_mz_is(184.073323) and
            self.has_mz(86.096425) and
            self.has_nl(183.066045)
        ):
            
            score += 5
            
            fa_mz = self.scan[self.nl_lookup(183.066045),1]
            
            ccs = self.ms1_cc(['PC', 'LysoPC'])
            
            for cc in ccs:
                
                for fa_frag in self.fa_list:
                    
                    if (
                        fa_frag[0] == self.cc2int(cc) and
                        abs(self.scan[fa_frag[5],1] - fa_mz) < 0.0001 and
                        'FA+G(' in self.scan[fa_frag[5],7] and
                        self.cc2int(cc)[0] < 21
                    ):
                        
                        score += 5
                        fattya.add(cc)
            
            if not fattya:
                score = 0
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def sm_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum is a Sphingomyeline.
        
        **Specimen:**
        
        - GLTPD1 + 703.57
      - GLTPD1 + 813.68 (in vitro)
        
        **Principle:**
        
        - The following choline fragments must be present: 60.0808, 86.0964,
        104.1069, 124.9998 and 184.0733. The last one is the most intensive.
        - If 58.0651 can be found it adds to the score.
      
        """
        
        score = 0
        fattya = set([])
        
        if all(
            map(
                lambda mz:
                    self.has_mz(mz),
                [
                    60.080776,
                    86.096425,  #
                    104.106990, #
                    124.999822, #
                    184.073323  #
                ]
            )
        ):
            
            score += 5
            
            if self.has_mz(58.0651):
                
                score += 1
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def fa_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum is a fatty acid.
        Here we only check if the most abundant fragment is the
        fatty acid itself.

        **Specimen:**
        
        - in vitro FABP1 +
      
        **Principle:**
        
        - The most abundant fragment must be a fatty acid which matches
        the carbon count and the unsaturation of the whole molecule.
        
        """
        score = 0
        fattya = set([])
        
        self.build_fa_list()
        
        if self.is_fa(0):
            
            if 'FA' in self.feature.ms1fa:
                
                for cc in self.feature.ms1fa['FA']:
                    
                    if len(self.fa_list) and self.cc2int(cc) == self.fa_list[0][0]:
                        
                        score += 5
                        fattya.add(cc)
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def fa_neg_1(self):
        """
        Examines if a negative mode MS2 spectrum is a fatty acid.
        Here we only check if the most abundant fragment is the
        fatty acid itself.

        **Specimen:**
        
        - in vitro FABP1 -
      
        **Principle:**
        
        - The most abundant fragment must be a fatty acid which matches
        the carbon count and the unsaturation of the whole molecule.
        
        """
        
        # these are the same
        return self.fa_pos_1()
    
[docs]    def cerp_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum is a Ceramide-1-phosphate.

        **Specimen:**
        
        - GLTPD1 + 728.59, 590.45, 702.58, 618.430, 616.415, 640.409
      
        **Principle:**
        
        - A sphingosine fragment with double H2O loss must be among the three
        highest abundant fragments.
        
        """
        
        score = 0
        fattya = set([])
        
        if self.fa_among_most_abundant('-H2O-H2O+]+', n = 3, sphingo = True):
            score += 1
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def pe_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum is a
        Phosphatidylethanolamine.

        **Specimen:**
        
        - BPI + 718.536
      
        **Principle:**
        
        - The PE headgroup neutral loss 141.0191 has the highest intensity.
      - If it is a Lyso-PE score will be zero.
        
        """
        
        score = 0
        fattya = set([])
        if self.nl_among_most_abundant(141.019097, 1):
            score += 5
            fattya = self.fa_combinations('PE')
            if not fattya and self.lysope_pos_1()['score']:
                score = 0
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def lysope_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum is a
        Lysophosphatidylethanolamine.

        **Specimen:**
        
        - in vitro FABP1 + 454.29
      
        **Principle:**
        
        - The PE headgroup neutral loss 141.0191 has the highest intensity.
      - A fatty acid-glycerol fragment should match the carbon count and
          unsaturation of the whole molecule.
        
        """
        
        score  = 0
        fattya = set([])
        
        if self.nl_among_most_abundant(141.019097, 2):
            
            score += 6
            
            if len(self.fa_list):
            
                frag1 = self.fa_list[0]
            
                ccs = self.ms1_cc(['PE', 'LysoPE'])
                
                for cc in ccs:
                    
                    if (frag1[0] == self.cc2int(cc) and
                        self.fa_type_is(frag1[5], 'FA+G(')):
                        
                        score += 1
                        fattya.add(cc)
                        
                    
                    else:
                        score -= 1
            
            else:
                score -= 1
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def cer_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum is a Ceramide.

        **Specimen:**
        
        - SEC14L1 + 538.52
      - STARD11 + 538.526
        
        **Principle:**
        
        - A sphingosine backbone with two H2O loss must be among the
        10 most abundant fragments.
        - Fatty acid [M+H]+ or [M-O]+ fragments or neutral losses
        complementing the one above increase the score.
        - Sphingosine backbone fragments with same carbon count and
        unsaturation with the one with 2 water loss but [Sph-C-2(H2O)]+
          or [Sph-H2O]+ add to the score.
        - The score increases if the following choline fragments
        can not be found: 58.0651, 104.1070, 124.9998 and 184.0733.
        - The presence of the following fragments increase the score:
        60.0444, 70.0651, 82.0651, 96.0808, 107.0730, 121.0886,
          135.1042 and 149.1199.
        
        """
        
        score = 0
        fattya = set([])
        
        if 'Cer' not in self.feature.ms1fa:
            ms1uns = None
            
        else:
            # larger unsaturation than the whole molecule
            # does not make sense
            ms1uns = max(map(lambda _cc: self.cc2int(_cc)[1],
                             self.feature.ms1fa['Cer']))
        
        if self.fa_among_most_abundant('-H2O-H2O+]+', n = 10,
                                       sphingo = True, uns = ms1uns):
            
            score += 5
            fattya = self.fa_combinations('Cer', sphingo = True)
            
            sph_ccs, fa_frags = self.matching_fa_frags_of_type('Cer',
                '-H2O-H2O+]+', sphingo = True, return_details = True)
            
            for cc, fa_frag_names in iteritems(fa_frags):
                
                for fa_frag_name in fa_frag_names:
                    
                    if '+H]+' in fa_frag_name:
                        score += 1
                    if '-O]+' in fa_frag_name:
                        score += 1
                    if 'NL' in fa_frag_name:
                        score += 1
            
            for sph_cc in sph_ccs:
                for fa_other in [
                    '[Sphingosine(C%u:%u)-C-H2O-H2O+]+',
                    '[Sphingosine(C%u:%u)-H2O+]+']:
                    if self.frag_name_present(fa_other % sph_cc):
                        
                        score += 1
            
            if not len(
                list(
                    filter(
                        lambda mz:
                            self.has_mz(mz),
                        [58.065126, 104.106990, 124.999822, 184.073323]
                    )
                )
            ):
                
                score += 1
            
            score += len(
                list(
                    filter(
                        lambda mz:
                            self.has_mz(mz),
                        [60.0443902, 70.0651257, 82.0651257, 96.0807757,
                        107.072951, 121.088601, 135.104251, 149.119901]
                    )
                )
            )
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def vd_pos_1(self):
        """
        Examines if a positive mode MS2 spectrum is a vitamin D.
        This method is not implemented, does nothing.
        """
        
        score = 0
        fattya = set([])
        
        return {'score': score, 'fattya': fattya}
    
[docs]    def ms1_cc(self, hgs):
        """
        For a list of headgroups returns the possible carbon counts
        based on database lookups of MS1 m/z's.
        Returns set of strings.
        """
        
        ccs = set([])
        
        for hg in hgs:
            
            if hg in self.feature.ms1fa:
                ccs.update(self.feature.ms1fa[hg])
            
            if hg in self.feature.ms1fa:
                ccs.update(self.feature.ms1fa[hg])
        
        return ccs
    
[docs]    def is_pe(self):
        if self.feature.mode == 'pos':
            return self.pa_pe_ps_pg_pos('PE')
        else:
            return self.pe_pc_pg_neg('PE')
    
[docs]    def is_pc(self):
        if self.feature.mode == 'pos':
            return self.pc_pos('PC')
        else:
            return self.pe_pc_pg_neg('PC')
    
[docs]    def is_pa(self):
        if self.feature.mode == 'pos':
            return self.pa_pe_ps_pg_pos('PA')
        else:
            return self.pa_ps_neg('PA')
    
[docs]    def is_ps(self):
        if self.feature.mode == 'pos':
            return self.pa_pe_ps_pg_pos('PS')
        else:
            return self.pa_ps_neg('PS')
    
[docs]    def is_pg(self):
        if self.feature.mode == 'pos':
            return self.pa_pe_ps_pg_pos('PG')
        else:
            return self.pe_pc_pg_neg('PG')
    
[docs]    def pa_pe_ps_pg_pos(self, hg):
        return self.mz_among_most_abundant(141.0191) \
            and self.fa_among_most_abundant('-O]+', min_mass = 140.0) \
            and self.fa_ccs_agree_ms1(hg, '-O]+')
    
[docs]    def pa_ps_neg(self, hg):
        return self.has_mz(152.9958366) and self.has_mz(78.95905658) \
            and self.most_abundant_fa('-H]-') \
            and self.fa_ccs_agree_ms1(hg, '-H]-')
    
[docs]    def pe_pc_pg_neg(self, hg):
        return self.most_abundant_fa('-H]-') \
            and self.fa_ccs_agree_ms1(hg, '-H]-')
    
[docs]    def pc_pos(self, hg):
        return self.mz_most_abundant_fold(184.0733, 3) \
            and self.fa_ccs_agree_ms1(hg, head = 4)