#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `lipyd` python module
#
# Copyright (c) 2015-2018 - EMBL
#
# File author(s): Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GNU GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# http://www.gnu.org/licenses/gpl-3.0.html
#
# Website: http://www.ebi.ac.uk/~denes
#
from future.utils import iteritems
import bs4
import re
import warnings
import imp
import sys
import copy
from collections import defaultdict
import lipyd._curl as _curl
#: URL for atomic masses
urlMasses = 'http://www.ciaaw.org/atomic-masses.htm'
#: URL for atomic weights
urlWeights = 'http://www.ciaaw.org/atomic-weights.htm'
#: URL for isotopic abundances
urlAbundances = 'http://www.ciaaw.org/isotopic-abundances.htm'
#: Mass of a proton
proton = 1.00727646677
#: Mass of an electron
electron = 0.00054857990924
#: Mass of a neutron
neutron = 1.00866491588
#: Mass of a proton
p = proton
#: Mass of an electron
e = electron
#: Mass of a neutron
n = neutron
reNonDigit = re.compile(r'[^\d.]+')
reform = re.compile(r'([A-Za-z][a-z]*)([0-9]*)')
replmi = re.compile(r'([-+])')
refloat = re.compile(r'[0-9\.]+')
[docs]def getMasses(url):
"""
Downloads an HTML table from CIAAW webpage
and extracts the atomic mass or weight information.
"""
c = _curl.Curl(url, silent = False)
reqMasses = c.result
with warnings.catch_warnings():
# there is a deprecated call in lxml
warnings.simplefilter('ignore', DeprecationWarning)
soupMasses = bs4.BeautifulSoup(reqMasses, 'lxml')
mass = {}
symbol = None
a = None
for tr in soupMasses.find_all('tr'):
tr = [td for td in tr.find_all('td')]
if not len(tr):
continue
elif len(tr) == 5:
symbol = tr[1].text.strip()
mass[symbol] = {}
a = int(reNonDigit.sub('', tr[-2].text.strip()))
m = [float(reNonDigit.sub('', i)) for i in tr[-1].text.split(',')]
m = sum(m) / len(m)
mass[symbol][a] = m
mass['proton'] = 1.00727646677
mass['electron'] = 0.00054857990924
mass['neutron'] = 1.00866491588
return mass
[docs]def getMassMonoIso():
"""
Obtains monoisotopic masses from CIAAW webpage.
Stores the result in `massMonoIso` module level variable.
"""
globals()['massMonoIso'] = getMasses(urlMasses)
[docs]def getMassFirstIso():
"""
Obtains the masses of the most abundant isotope for each element.
The result stored in the :py:attr:`.massFirstIso` module attribute.
"""
if 'massMonoIso' not in globals():
getMassMonoIso()
if 'freqIso' not in globals():
getFreqIso()
firstIso = {}
for symbol, isos in iteritems(massMonoIso):
if symbol in freqIso:
try:
firstIso[symbol] = \
isos[max(freqIso[symbol].items(), key = lambda i: i[1])[0]]
except:
continue
firstIso['proton'] = proton
firstIso['electron'] = electron
firstIso['neutron'] = neutron
globals()['massFirstIso'] = firstIso
globals()['massdb'] = firstIso
[docs]def getWeightStd():
"""
Obtains atomic waights from CIAAW webpage.
Stores the result in :py:attr:`.weightStd` attribute of the module.
"""
globals()['weightStd'] = getMasses(urlWeights)
[docs]def getFreqIso():
"""
Obtains isotope abundances from CIAAW webpage.
Stores the result in :py:attr:`.freqIso` attribute of the module.
"""
c = _curl.Curl(urlAbundances, silent = False)
reqAbundances = c.result.split('\n')
# fixing erroneous HTML from CIAAW:
for i, l in enumerate(reqAbundances[:-1]):
l = l.strip()
# print('..%s.. ..%s..' % (l[-5:], reqAbundances[i + 1][:3]))
if l[-5:] == '</tr>' and reqAbundances[i + 1][:3] == '<td':
# print('ermfeoirm')
reqAbundances[i + 1] = '<tr>%s' % reqAbundances[i + 1]
with warnings.catch_warnings():
# there is a deprecated call in lxml
warnings.simplefilter('ignore', DeprecationWarning)
soupAbundances = bs4.BeautifulSoup('\n'.join(reqAbundances), 'lxml')
freqIso = {}
symbol = None
a = None
for tr in soupAbundances.find_all('tr'):
tr = [td for td in tr.find_all('td')]
if len(tr) == 6:
symbol = tr[1].text.strip()
freqIso[symbol] = {}
ai = -3 if len(tr) == 6 else -2
try:
a = int(tr[ai].text.strip())
p = [float(reNonDigit.sub('', i)) for i in tr[ai + 1].text.split(',')]
p = sum(p) / len(p)
freqIso[symbol][a] = p
except (ValueError, IndexError, KeyError):
continue
globals()['freqIso'] = freqIso
weight_builtin = {
"proton": 1.00727646677,
"electron": 0.00054857990924,
"neutron": 1.00866491588,
"H": 1.007825,
"He": 4.002602,
"Li": 6.941,
"Be": 9.012182,
"B": 10.811,
"C": 12.0107,
"N": 14.00674,
"O": 15.9994,
"F": 18.9984032,
"Ne": 20.1797,
"Na": 22.989768,
"Mg": 24.3050,
"Al": 26.981539,
"Si": 28.0855,
"P": 30.973762,
"S": 32.066,
"Cl": 35.4527,
"Ar": 39.948,
"K": 39.0983,
"Ca": 40.078,
"Sc": 44.955910,
"Ti": 47.88,
"V": 50.9415,
"Cr": 51.9961,
"Mn": 54.93805,
"Fe": 55.847,
"Co": 58.93320,
"Ni": 58.6934,
"Cu": 63.546,
"Zn": 65.39,
"Ga": 69.723,
"Ge": 72.61,
"As": 74.92159,
"Se": 78.96,
"Br": 79.904,
"Kr": 83.80,
"Rb": 85.4678,
"Sr": 87.62,
"Y": 88.90585,
"Zr": 91.224,
"Nb": 92.90638,
"Mo": 95.94,
"Tc": 98.0,
"Ru": 101.07,
"Rh": 102.90550,
"Pd": 106.42,
"Ag": 107.8682,
"Cd": 112.411,
"In": 114.82,
"Sn": 118.710,
"Sb": 121.757,
"Te": 127.60,
"I": 126.90447,
"Xe": 131.29,
"Cs": 132.90543,
"Ba": 137.327,
"La": 138.9055,
"Ce": 140.115,
"Pr": 140.90765,
"Nd": 144.24,
"Pm": 145.0,
"Sm": 150.36,
"Eu": 151.965,
"Gd": 157.25,
"Tb": 158.92534,
"Dy": 162.50,
"Ho": 164.93032,
"Er": 167.26,
"Tm": 168.93421,
"Yb": 173.04,
"Lu": 174.967,
"Hf": 178.49,
"Ta": 180.9479,
"W": 183.85,
"Re": 186.207,
"Os": 190.2,
"Ir": 192.22,
"Pt": 195.08,
"Au": 196.96654,
"Hg": 200.59,
"Tl": 204.3833,
"Pb": 207.2,
"Bi": 208.98037,
"Po": 209,
"At": 210,
"Rn": 222,
"Fr": 223,
"Ra": 226.0254,
"Ac": 227,
"Th": 232.0381,
"Pa": 213.0359,
"U": 238.0289,
"Np": 237.0482,
"Pu": 244,
"Am": 243,
"Cm": 247,
"Bk": 247,
"Cf": 251,
"Es": 252,
"Fm": 257,
"Md": 258,
"No": 259,
"Lr": 260,
"Rf": 261,
"Db": 262,
"Sg": 263,
"Bh": 262,
"Hs": 265,
"Mt": 266,
}
isotopes = {
"H2": 2.01410178,
"H3": 3.0160492,
"C13": 13.003355,
"N15": 15.000109,
"O17": 16.999132,
"O18": 17.999160,
"S33": 32.971458,
"S34": 33.967867,
"S35": 35.967081
}
iso_freq = {
"H2": 0.000115,
"H3": 0.0,
"C13": 0.0107,
"N15": 0.0068,
"O17": 0.00038,
"O18": 0.00205,
"S33": 0.0076,
"S34": 0.0429,
"S35": 0.0002
}
[docs]class MassBase(object):
def __init__(
self,
formula_mass = None,
charge = 0,
isotope = 0,
**kwargs
):
"""
This class is very similar to `Formula`. Actually it can be
initialized either with providing a formula or a mass or
even element counts as keyword arguments.
The key difference compared to `Formula` is that it behaves
as a `float` i.e. indeed represents a molecular mass, while
`Formula` behaves as a chemical formula i.e. representing
the counts of elements. If you add two `MassBase` instances
(or a float) you get a `float` while if you add two
`Formula` instances (or a string) you get a new `Formula`.
Finally `Mass` is able to provide both behaviours but
adding two `Mass` instances will result a new `Mass`.
Args
----
:param str,float,NoneType formula_mass: Either a string
expressing a chemical formula (e.g. H2O) or a molecular
mass (e.g. 237.1567) or `None` if you provide the
formula as keyword arguments.
**kwargs: elements & counts, e.g. c = 6, h = 12, o = 6...
Thanks for
https://github.com/bsimas/molecular-weight/blob/master/chemweight.py
"""
if 'massFirstIso' not in globals():
getMassFirstIso()
self.exmass = massFirstIso
self.charge = charge
self.isotope = isotope
if formula_mass is None:
self.formula_from_dict(kwargs)
elif hasattr(formula_mass, 'lower'):
self.formula = formula_mass
elif isinstance(formula_mass, MassBase):
if hasattr(formula_mass, 'mass'):
self.mass = formula_mass.mass
if hasattr(formula_mass, 'formula'):
self.formula = formula_mass.formula
self.mass_calculated = formula_mass.mass_calculated
else:
self.formula = None
if type(formula_mass) is float:
self.mass = formula_mass
self.calc_mass()
def __neg__(self):
return -1 * self.mass
def __add__(self, other):
return float(other) + self.mass
def __radd__(self, other):
return self.__add__(other)
def __iadd__(self, other):
self.mass += float(other)
def __sub__(self, other):
return self.mass - float(other)
def __rsub__(self, other):
return float(other) - self.mass
def __isub__(self, other):
self.mass += float(other)
def __truediv__(self, other):
return self.mass / float(other)
def __rtruediv__(self, other):
return float(other) / self.mass
def __itruediv__(self, other):
self.mass /= float(other)
def __mul__(self, other):
return self.mass * float(other)
def __rmul__(self, other):
return self.__mul__(other)
def __imul__(self, other):
self.mass *= float(other)
def __float__(self):
return self.mass
def __eq__(self, other):
return abs(self.mass - float(other)) <= 0.01
[docs] def calc_mass(self):
if self.has_formula():
if self.formula == '':
self.mass = 0.0
self.mass_calculated = True
else:
atoms = (
reform.findall(self.formula)
if not hasattr(self, 'atoms')
else iteritems(self.atoms)
)
m = 0.0
for element, count in atoms:
count = int(count or '1')
m += self.exmass[element] * count
m -= self.charge * massdb['electron']
m += self.isotope * massdb['neutron']
self.mass = m
self.mass_calculated = self.has_mass()
else:
self.mass_calculated = False
[docs] def has_mass(self):
return self.mass > 0.0 or (self.formula == '' and self.mass == 0.0)
[docs] def reload(self):
modname = self.__class__.__module__
mod = __import__(modname, fromlist=[modname.split('.')[0]])
imp.reload(mod)
new = getattr(mod, self.__class__.__name__)
setattr(self, '__class__', new)
parts = {
'water': 'H2O',
'twowater': 'H4O2',
'carboxyl': 'COOH',
'aldehyde': 'CHO',
}
for name, form in parts.items():
setattr(sys.modules[__name__], name, MassBase(form))
[docs]def first_isotope_mass(elem):
return massFirstIso[elem] if elem in massFirstIso else None
[docs]def get_mass(elem):
"""
Returns exact mass of the highest abundant isotope of an element.
"""
return first_isotope_mass(elem)
[docs]def isotope_mass(elem, iso):
return (
massMonoIso[elem][iso]
if elem in massMonoIso and iso in massMonoIso[elem] else
None
)
[docs]def get_weight(elem):
return weight_builtin[elem] if elem in weight_builtin else None
[docs]def calculate(formula):
"""
Evaluates a string as formula.
Args
----
:param str formula:
Expression as a string e.g. ``HCH3CHOHCOOH - water + electron``.
Returns
-------
Mass as float.
"""
result = None
op = '__add__'
for step in replmi.split(formula):
if step == '-':
op = '__sub__'
continue
if step == '+':
op = '__add__'
continue
step = step.strip()
if refloat.match(step):
step = float(step)
if (
step in globals() and
isinstance(globals()[step], (float, int, MassBase))
):
step = globals()[step]
if op is not None:
result = getattr(MassBase(result), op)(MassBase(step))
op = None
return result
#: Synonym of :py:func:`calculate`.
expr = calculate