#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This file is part of the `lipyd` python module
#
# Copyright (c) 2015-2017 - EMBL
#
# File author(s): Dénes Türei (turei.denes@gmail.com)
#
# Distributed under the GNU GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# http://www.gnu.org/licenses/gpl-3.0.html
#
# Website: http://www.ebi.ac.uk/~denes
#
import re
import pandas as pd
[docs]class Results(object):
def __init__(self):
pass
[docs] def read_manual(self):
"""
Reads adequate columns from manually annotated tables.
Provides data in `Screening().manual` attribute.
"""
reclass = re.compile(r'(^[IV]*\.?[0-9]?).*')
def read_line(l, protein, mode):
if len(l[17]) and len(l[12]) and len(l[13]):
return \
[
float(l[13]), # m/z corrected
reclass.match(l[17]).groups()[0], # result class
l[14], # SwissLipids name
l[15], # main headgroup class
int(float(l[12])), # intensity
float(l[2]), # m/z original
protein, # protein name
mode, # ion mode
float(l[5]), # RT mean
float(l[6]) if l[6] != 'NA' else np.nan, # RT MS2 closest
float(l[4].split('-')[0].strip()), # RT lower
float(l[4].split('-')[1].strip()) # RT greater
]
def read_table(tbl, protein, mode):
return \
list(
filter(
lambda l:
l is not None,
map(
lambda l:
read_line(l, protein, mode),
tbl
),
)
)
data = {}
fnames = \
list(
filter(
lambda f:
f.endswith(self.manualend),
os.listdir(self.manualdir)
)
)
for f in fnames:
protein = f.split('_')[0]
xlsname = os.path.join(self.manualdir, f)
tblneg = self.read_xls(xlsname,
sheet = '%s_negative_best' % protein)
tblpos = self.read_xls(xlsname,
sheet = '%s_positive_best' % protein)
data[protein] = {}
data[protein]['neg'] = read_table(tblneg[1:], protein, 'neg')
data[protein]['pos'] = read_table(tblpos[1:], protein, 'pos')
self.manual = data
[docs] def read_manual2(self, fname = 'All_results_v04.xlsx'):
"""
Reads manually annotated results from Marco's final table.
"""
result = {}
reclass = re.compile(r'(^[IV]*\.?[0-9]?).*')
tbl = self.read_xls(fname, sheet = 'final data')
for l in tbl:
protein, mode = l[1].split('_')
mode = mode[:3]
if protein not in result:
result[protein] = {}
if mode not in result[protein]:
result[protein][mode] = []
if l[2].strip()[:4] == 'Qual':
continue
result[protein][mode].append([
float(l[15]), # m/z corrected
reclass.match(l[19]).groups()[0], # result class
l[16].replace(u'−', '-'), # SwissLipids name
l[17].replace(u'−', '-'), # main headgroup class
int(float(l[14])), # intensity
float(l[4]), # m/z original
protein, # protein name
mode, # ion mode
float(l[7]), # RT mean
float(l[8]) if l[8] != 'NA' else np.nan, # RT MS2 closest
float(l[6].split('-')[0].strip()), # RT lower
float(l[6].split('-')[1].strip()) # RT greater
])
self.manual = result
[docs] def manual_df(self, screen = 'A', only_swl_col = False):
"""
Creates a pandas dataframe from manual results.
"""
shgs = {
'Monoalkylmonoacylglycerol-O': 'DAG-O',
'hydroquinone?': 'HQ',
'Ganglioside GM1': 'GM1',
'Ganglioside GM2': 'GM2',
'Ganglioside GM3': 'GM3',
'Ganglioside GM4': 'GM4',
'Ganglioside GA1': 'GA1',
'Lysophosphatidylethanolamine': 'LysoPE',
'alpha-tocopherol metabolite': 'VE',
'alpha-tocopherol': 'VE',
r'Retinol {calculated as -H2O adduct '\
r'is not in applied database}': 'VA',
'docosapentaenoate': 'PUFA',
'octacosatetraenoate': 'PUFA',
'octacosapentaenoate': 'PUFA',
'octadecatetraenoate': 'PUFA',
'octatriacontatetraenoate': 'PUFA',
'tetracosapentaenoate': 'PUFA',
'hexacosatetraenoate': 'PUFA',
'hexacosanoate': 'PUFA',
'dotriacontapentaenoate': 'PUFA',
'Sterol ester': 'SE',
'nothing': 'NA',
'unknown': 'NA',
'Monoalkylglycerol-O': 'MAG-O',
'Monoalkyldiacylglycerol-O': 'TAG-O',
'Dihexosyldiacylglycerol': 'HexDAG',
'Monohexosyldiacylglycerol': 'HexDAG',
'Monoalkylmonoacylglycerol-O': 'DAG-O',
'Monoalkyldiacylglycerol-O': 'TAG-O',
'Monoalkylmonoacylglycerol': 'DAG-O',
'24-Hydroxy-19-norgeminivitamin D3': 'VD',
'NP40': 'P40',
'Cer1P': 'CerP',
'Phosphatidylcholine': 'PC',
'Phosphatidylethanolamine': 'PE',
'Phosphatidylcholine-O': 'PC-O',
'Phosphatidylethanolamine-O': 'PE-O',
'Dihexosyl ceramide': 'Hex2Cer',
'Sulfodihexosyl ceramide': 'SHex2Cer',
'BMP / PG': 'PG/BMP',
'BMP/PG': 'PG/BMP',
'Lyso-O-PE': 'LysoPE-O',
'Lyso-O-PC': 'LysoPC-O',
'Lyso-O-PG': 'LysoPG-O',
'Lyso-O-PS': 'LysoPS-O',
'LysoO-PE': 'LysoPE-O',
'LysoO-PG': 'LysoPG-O',
'LysoO-PC': 'LysoPC-O',
'LysoO-PS': 'LysoPS-O',
'lysoPG': 'LysoPG',
'Hex-Cer': 'HexCer'
}
shgs2 = {
'Ganglioside': 'GM',
'Vit.A1': 'VA',
'Vit. E metabolite': 'VE',
'SulfohexCer': 'SHexCer',
'SulfoHexCer': 'SHexCer',
'Sulfo HexCer': 'SHexCer',
'SulfodihexCer': 'SHex2Cer',
'DiHexCer-OH': 'Hex2CerOH',
'DiHexCer': 'Hex2Cer',
'PI2xP': 'PIP2',
'MAMAG': 'DAG-O'
}
uhgs = {
'Hex2Cer': 'Hex2Cer',
'Hex2Cer-OH': 'Hex2CerOH',
'HexCer-OH': 'HexCerOH',
'GM3': 'GM',
'Detergent': 'P40',
'Hex-Cer': 'HexCer'
}
def get_names(swl_names, manual_names):
"""
Extracts the lipid names and carbon counts
from the SwissLipids IDs field.
"""
something = []
nothing = []
# fixing typos and inconsequent naming:
manual_name_1 = manual_names.strip().split('(')[0]
manual_name_1_l = manual_name_1.lower().strip()
if (manual_name_1_l == 'ambiguous' or
manual_name_1_l == 'ambigous'):
manual_name_1 = 'ambiguous'
if manual_name_1_l == 'unknown' or manual_name_1_l == 'unkown':
manual_name_1 = 'NA'
if not len(manual_name_1_l):
manual_name_1 = 'NA'
# :done
# testing if there is PG/BMP ambiguity
pg_bmp = not bool(set(['BMP', 'PG']) -
set(x.split('(')[0].strip()
for x in manual_names.split(','))) or (
not bool(set(['BMP', 'PG']) - set(
itertools.chain(*[[
self.headgroup_from_lipid_name(['S', None, xx])[0]
for xx in x.split(';')
] for x in swl_names.split(r'///')])
)))
only_pg_bmp = not bool(set(x.split('(')[0].strip()
for x in manual_names.split(',')) -
set(['BMP', 'PG'])) or (
not bool(set(
itertools.chain(*[[
self.headgroup_from_lipid_name(['S', None, xx])[0]
for xx in x.split(';')
] for x in swl_names.split(r'///')])
) - set(['BMP', 'PG', None])))
for lips in (
swl_names.split(r'///')
if (
only_swl_col or
manual_name_1 == 'NA' or
manual_name_1 == 'ambiguous' or
manual_name_1 == 'adduct'
) else # at Enric we use the manual names column:
manual_names.split(',')):
lips = lips.strip()
# matching the adduct type
add = self.readd.match(lips)
if add is not None:
add = add.groups()[0]
else:
add = ''
for lip in lips.split(';'):
lip = lip.strip()
if lip and lip[0] == '(':
lip = lip[1:]
if 'lyso' in lip.lower():
lyso = 'Lyso'
manual_name_1 = manual_name_1.replace('yso-', 'yso')
else:
lyso = ''
swl_parsed = self.headgroup_from_lipid_name(['S', None, lip])[0]
if pg_bmp:
if lips[:2] == 'PG' or swl_parsed == 'PG':
# at PG we replace with PG/BMP
lips = lips.replace('PG', 'PG/BMP')
fullhg = 'PG/BMP'
swl_parsed = 'PG/BMP'
if only_pg_bmp:
manual_name_1 = 'PG/BMP'
elif lips[:3] == 'BMP' or swl_parsed == 'BMP':
# at BMP we skip
continue
if swl_parsed is None:
swl_parsed = lip
if ']' in swl_parsed:
swl_parsed = swl_parsed.split(']')[1]
swl_parsed = swl_parsed.split('(')[0].strip()
if ':' in swl_parsed:
swl_parsed = swl_parsed.split(':')[1].strip()
if 'nothing' in swl_parsed:
swl_parsed = 'NA'
# regex finds the total carbon count
cc1 = self.recount1.findall(lip)
# special case if the fatty acid
# name is greek name
if swl_parsed in fa_greek:
cc1 = [('', fa_greek[swl_parsed][0],
fa_greek[swl_parsed][1])]
swl_parsed = 'FA'
# a full headgroup name:
fullhg = '%s%s%s' % (
lyso if not swl_parsed.startswith('Lyso') else '',
swl_parsed,
'%s' % ('-O' if len(cc1) and cc1[0][0] == 'O' else '')
)
# regex finds 2-3 fatty acids
cc2s = self.recount3.findall(lip)
# the total carbon count
ccpart = (
[cc1[0][0], int(cc1[0][1]), int(cc1[0][2])]
if len(cc1) else
['', np.nan, np.nan]
)
# carbon counts of fatty acids
if len(cc2s) and (
any(map(lambda cc2: cc2[4], cc2s)) or
swl_parsed == 'FA' or
lyso
):
faccparts = (
list(
map(
lambda cc2:
[
# FA1
cc2[0],
int(cc2[1]),
int(cc2[2]),
# FA2
cc2[3],
int(cc2[4]) if cc2[4] else np.nan,
int(cc2[5]) if cc2[5] else np.nan,
# FA3
cc2[6],
int(cc2[7]) if cc2[7] else np.nan,
int(cc2[8]) if cc2[8] else np.nan
],
filter(
lambda cc2:
# if this is a Lyso species
# or single fatty acid
# we have only one cc:unsat
# otherwise we must have at least 2
cc2[4] or swl_parsed == 'FA' or lyso,
cc2s
)
)
)
)
else:
faccparts = [
[
'', np.nan, np.nan,
'', np.nan, np.nan,
'', np.nan, np.nan
]
]
for faccpart in faccparts:
if cc2s and not cc1:
ccpart = [
faccpart[0],
np.nansum([faccpart[1], faccpart[4], faccpart[7]]),
np.nansum([faccpart[2], faccpart[5], faccpart[8]])
]
res = []
res.append(swl_parsed)
res.append(lyso)
res.extend(ccpart)
res.extend(faccpart)
res.append(fullhg)
res.append(manual_name_1)
if res[14] == 'NA':
nothing.append(res)
else:
something.append(res)
return something or nothing
if not hasattr(self, 'manual') or self.manual is None:
self.read_manual2()
if not hasattr(self, 'lipnames') or self.lipnames is None:
self.read_lipid_names()
result = []
def get_uhg(cnt, counts):
if cnt[-1] not in ['ambiguous', 'adduct']:
uhg = cnt[-1]
else:
uhg = cnt[-2]
if uhg in uhgs:
uhg = uhgs[uhg]
uhg = uhg.replace('-O-', '-O')
if (uhg == 'NA' and (len(counts) == 1 or
len(set(c[14] for c in counts)) == 1)):
uhg = cnt[14]
return uhg
for protein, d in iteritems(self.manual):
for mode, tbl in iteritems(d):
for i, l in enumerate(tbl):
counts = get_names(l[2], l[3])
if l[3].strip() in shgs2:
l[3] = shgs2[l[3].strip()]
res = [protein, mode, i, l[0], l[5], l[4], l[1], l[3]] + \
l[8:12] # 12 cols: protein -- rtup
#this_feature_hgs = set([get_uhg(cnt, counts) for cnt in counts])
#if not (set(['PG', 'BMP']) - this_feature_hgs):
# pass
for cnt in counts:
res1 = res[:]
if cnt[1] == 'O':
cnt[0] = '%s-O' % cnt[0]
if cnt[0].strip() in shgs:
cnt[0] = shgs[cnt[0].strip()]
cnt[-1] = cnt[-1].strip()
for hgi in [-1, -2]:
if cnt[hgi] in shgs:
cnt[hgi] = shgs[cnt[hgi]]
if cnt[hgi] in shgs2:
cnt[hgi] = shgs2[cnt[hgi]]
uhg = get_uhg(cnt, counts)
cnt.append(uhg)
if not np.isnan(cnt[3]) and not np.isnan(cnt[4]):
cnt.append('%s(%u:%u)' % (
cnt[16] if cnt[16] != 'NA' else cnt[14],
cnt[3],
cnt[4]
))
cnt.append('%u:%u' % (cnt[3], cnt[4]))
else:
cnt.append('NA')
cnt.append('NA')
facc = []
for i in [6, 9, 12]:
if not np.isnan(cnt[i]) and not np.isnan(cnt[i+1]):
facc.append((cnt[i], cnt[i+1]))
facc = '/'.join(map(lambda cc: '%u:%u' % cc,
sorted(facc)))
if len(facc):
cnt.append('%s(%s)' % (
cnt[16] if cnt[16] != 'NA' else cnt[14],
facc)
)
cnt.append(facc)
else:
cnt.append('NA')
cnt.append('NA')
res1.extend(cnt)
res1.append(screen)
result.append(res1)
self.pmanual = pd.DataFrame(result, columns = self.df_header)
[docs] def auto_df(self, screen_name = 'E'):
"""
Compiles a data frame in the same format as `manual_df`
just from the programmatic results.
"""
result = []
for protein, d in iteritems(self.valids):
for mode, tbl in iteritems(d):
ii = 0
for i, incl in enumerate(tbl['good']):
if not incl:
continue
mz = tbl['mz'][i]
oi = tbl['i'][i]
idlevel = tbl['idlevel'][oi]
intensity = round(tbl['aaa'][i])
rt = tbl['rt'][i,:]
rtmean = np.mean(tbl['rt'][i,:])
rtms2 = tbl['ms2rt'][i]
cids = tbl['cid'][oi]
clm = None
if not cids:
clm = 'unknown'
cids = ['unknown']
elif len(cids) > 1:
clm = 'ambiguous'
for lip in cids:
res = []
res.append(protein)
res.append(mode)
res.append(ii)
res.append(mz)
res.append(mz)
res.append(intensity)
res.append(idlevel)
cl = lip.split('(')[0] if clm is None else clm
hg = 'NA' if clm == 'unknown' else lip.split('(')[0]
lyso = 'Lyso' if 'lyso' in lip.lower() else ''
hg = 'NA' if clm == 'unknown' else lip.split('(')[0]
pref = 'O' if '-O' in hg else ''
if 'Cer' in hg:
pref = 'd' if 'CerOH' not in hg else 't'
if pref =='O':
hg = '%s-O' % hg
hg0 = hg.replace(
'-O', '').replace(
'CerOH', 'Cer').replace(
'Cer1P', 'CerP')
res.append(hg)
res.append(rtmean)
res.append(rtms2)
res.append(rt[0])
res.append(rt[1])
res.append(hg0)
res.append(lyso)
# res.append(pref)
cc = self.recount3.findall(lip)
if cc:
sumcc = sum(map(lambda cci:
int(cc[0][cci]) if cc[0][cci] else 0,
[1, 4, 7]))
sumuns = sum(map(lambda uni:
int(cc[0][uni]) if cc[0][uni] else 0,
[2, 5, 8]))
res.extend([pref, sumcc, sumuns])
hgcc = '%s(%u:%u)' % (hg, sumcc, sumuns)
sumccuns = '%u:%u' % (sumcc, sumuns)
fa = '%s:%s' % (cc[0][1], cc[0][2])
if cc[0][4]:
fa = '%s/%s:%s' % (fa, cc[0][4], cc[0][5])
if cc[0][7]:
fa = '%s/%s:%s' % (fa, cc[0][7], cc[0][8])
else:
res.extend(['', np.nan, np.nan])
hgcc = 'NA'
sumccuns = 'NA'
fa = 'NA'
cc2 = self.recount2.findall(lip)
if (
cc and
cc[0][1] and
cc[0][2] and (
cc[0][4] or hg == 'FA' or lyso
)):
res.extend([cc[0][0],
int(cc[0][1]),
int(cc[0][2]),
cc[0][3],
int(cc[0][4]) if cc[0][4] else np.nan,
int(cc[0][5]) if cc[0][5] else np.nan,
cc[0][6],
int(cc[0][7]) if cc[0][7] else np.nan,
int(cc[0][8]) if cc[0][8] else np.nan
])
else:
res.extend(['', np.nan, np.nan,
'', np.nan, np.nan,
'', np.nan, np.nan])
res.append(hg)
res.append(hg if clm is None else clm)
res.append('NA' if clm == 'unknown' else hg)
res.append(hgcc)
res.append(sumccuns)
res.append('%s(%s)' % (hg, fa))
res.append(fa)
res.append(screen_name)
result.append(res)
ii += 1
self.pauto = pd.DataFrame(result, columns = self.df_header)
[docs] def headgroups_cross_screening(self, label1 = 'Screen1',
label2 = 'Screen2',
idlevels = set(['I']),
outfile = 'headgroups_%s_%s.tab'):
"""
Does a quick comparison at headgroup/protein level
between 2 screenings.
"""
self.manual_df()
result = []
for protein, d in iteritems(self.valids):
for mode, tbl in iteritems(d):
s1_hg = set(
map(
lambda hg:
hg.replace('-O', ''),
self.pmanual[
np.logical_and(
np.logical_and(
self.pmanual.protein == protein,
self.pmanual.ionm == mode
),
self.pmanual.cls.isin(idlevels)
)
]['uhgroup']
)
)
s2_hg = (
set(
reduce(
lambda h1, h2:
h1 | h2,
map(
lambda i:
set(
map(
lambda hgfa:
hgfa.split('(')[0],
tbl['cid'][i[1]]
)
),
filter(
lambda i:
(
tbl['slobb'][i[0]] and
tbl['idlevel'][i[1]] in idlevels
),
enumerate(tbl['i'])
)
),
set([])
)
)
)
for hg12 in (s2_hg & s1_hg):
result.append(['%s-%s' % (protein, mode), hg12,
'%s_%s' % (label1, label2)])
for hg1 in (s1_hg - s2_hg):
result.append(['%s-%s' % (protein, mode), hg1, label1])
for hg2 in (s2_hg - s1_hg):
result.append(['%s-%s' % (protein, mode), hg2, label2])
outfile = outfile % (label1, label2)
hdr = ['protein_mode', 'hg', 'found_in']
self.cross_screen_hg = result
with open(outfile, 'w') as fp:
fp.write('%s\n' % '\t'.join(hdr))
fp.write('\n'.join(map(lambda row: '\t'.join(row), result)))
[docs] def bubble_altair(self,
classes = ['I', 'II'],
subtitle = '',
main_title = ''):
smodes = {'pos': '+', 'neg': '-'}
# select the classes
data = self.pmanual[self.pmanual.cls.isin(classes)]
nrows = 0
ncols = len(data.ionm.unique())
subplot_titles = []
allhgs = sorted(data.headgroup.unique())
unsat = np.arange(min(data.unsat), max(data.unsat) + 1)
carb = np.arange(min(data.carb), max(data.carb) + 1)
inte = (min(data.intensity), max(data.intensity))
traces = []
xlim = [min(unsat), max(unsat)]
ylim = [min(carb), max(carb)]
data = data.sort_values(by = ['protein', 'ionm'])
a = altair.Chart(data).mark_point().encode(
row = 'protein',
column = 'ionm',
size = 'Intensity:average(intensity)',
x = altair.X('unsat', axis = altair.Axis(title = 'Unsaturated count')),
y = altair.Y('carb', axis = altair.Axis(title = 'Carbon count'))
)
return a
[docs] @staticmethod
def export_df(df, fname, **kwargs):
"""
Exports the results from a `pandas.DataFrame` to csv.
"""
if 'sep' not in kwargs:
kwargs['sep'] = '\t'
if 'na_rep' not in kwargs:
kwargs['na_rep'] = 'NaN'
if 'index' not in kwargs:
kwargs['index'] = False
df.to_csv(fname, **kwargs)
[docs] def bubble_plotly(self,
classes = ['I', 'II'],
subtitle = '',
main_title = ''):
smodes = {'pos': '+', 'neg': '-'}
# select the classes
data = self.pmanual[self.pmanual.cls.isin(classes)]
nrows = 0
ncols = len(data.ionm.unique())
subplot_titles = []
allhgs = sorted(data.headgroup.unique())
unsat = np.arange(min(data.unsat), max(data.unsat) + 1)
carb = np.arange(min(data.carb), max(data.carb) + 1)
inte = (min(data.intensity), max(data.intensity))
traces = []
xlim = [min(unsat), max(unsat)]
ylim = [min(carb), max(carb)]
for protein in sorted(data.protein.unique()):
nrows += 1
for mode in sorted(data.ionm.unique()):
subplot_titles.append('%s%s%s' % (
protein,
smodes[mode],
', %s' % subtitle if len(subtitle) else '')
)
this_data = \
data[(data.protein == protein) & (data.ionm == mode)]
vals = this_data.groupby(['carb', 'unsat'])['intensity'].sum()
#print(list(iteritems(vals)))
x = list(map(lambda i: i[0][1], iteritems(vals)))
y = list(map(lambda i: i[0][0], iteritems(vals)))
s = list(map(lambda i: i[1] / float(inte[1]), iteritems(vals)))
#print(protein, x, y, s)
traces.append(go.Scatter(x = x, y = y,
mode = 'markers',
marker = dict(size = s, sizemode = 'area', sizeref = 0.0001),
name = '%s%s' % (protein, smodes[mode]), fill = '#333333', showlegend = False,
xaxis = dict(range = xlim),
yaxis = dict(range = ylim))
)
fig = plotly.tools.make_subplots(rows=nrows,
cols=ncols,
print_grid = False,
subplot_titles=subplot_titles
)
for i, trace in enumerate(traces):
fig.append_trace(trace, row = int(np.floor(i / ncols) + 1), col = (i % ncols) + 1)
fig['layout'].update(height = nrows * 500, width = 600, title = main_title,
xaxis = dict(range = xlim), yaxis = dict(range = ylim))
pl.iplot(fig, show_link = False)
[docs] def piecharts_plotly(self, by_class = True, main_title = 'Lipid classes by protein', result_classes = {'I'}):
"""
Plots piecharts of detected lipids for each protein based on manually
annotated results.
Uses plotly, output accessible in Jupyter notebook.
"""
def get_names(r, by_class = True):
counts = []
for lips in r[2].split('///'):
for lip in lips.split(';'):
if 'nothing' in lip or not len(lip.strip()):
continue
cl = self.headgroup_from_lipid_name(['S', None, lip])[0]
if cl is None:
cl = lip.split('(')[0].strip()
if ':' in cl:
cl = cl.split(':')[1].strip()
if by_class:
cc = ''
else:
cc = self.recount2.findall(lip)
if not len(cc):
cc = self.recount1.findall(lip)
cc = cc[0] if len(cc) else '?'
counts.append('%s(%s)' % (cl, cc) if len(cc) else cl)
return counts
if not hasattr(self, 'manual') or self.manual is None:
self.read_manual()
if not hasattr(self, 'lipnames') or self.lipnames is None:
self.read_lipid_names()
main_title = '%s (class %s)' % (main_title, ', '.join(sorted(list(result_classes))))
modes = {'pos': 'positive', 'neg': 'negative'}
smodes = {'pos': '+', 'neg': '-'}
nrows = int(np.ceil(len(self.manual) / 2.0))
height = 500 * nrows
param = {
'data': [],
'layout': {
'title': main_title,
'annotations': [],
'autosize': False,
'width': 600,
'height': height
}
}
traces = []
#fig = plotly.tools.make_subplots(rows=nrows, cols=2, print_grid = False)
#subplot_titles=('First Subplot','Second Subplot', 'Third Subplot'))
n = 0
for protein in sorted(self.manual.keys()):
for mode in ['neg', 'pos']:
this_data = {}
this_anno = {'font': {'size': 10}, 'showarrow': False}
lab_val = {}
for r in self.manual[protein][mode]:
if r[1].strip() in result_classes:
label = '/'.join(get_names(r, by_class = by_class))
if label not in lab_val:
lab_val[label] = 0.0
lab_val[label] += r[4]
this_data['labels'], this_data['values'] = \
zip(*sorted(lab_val.items(), key = lambda i: i[0])) \
if len(lab_val) else (['None'], [1])
this_data['name'] = '%s %s, \nsum of intensities' % (protein, modes[mode])
this_data['type'] = 'pie'
this_data['hole'] = 0.4
this_data['hoverinfo'] = 'label+percent+name'
this_data['domain'] = {
'x': [
n % 2 / 2.0,
n % 2 / 2.0 + 0.5
],
'y': [
1.0 - (0.48 / nrows * np.floor(n / 2) + 0.003),
1.0 - (0.48 / nrows * (np.floor(n / 2) + 1) - 0.003)
]
}
this_pie = go.Pie(**this_data)
traces.append(this_pie)
# print('%s: n = %u, %s' % (protein, n, str(this_data['domain'])))
# fig.append_trace(this_pie, int(n % 2 + 1), int(np.floor(n / 2.0) + 1))
this_anno['text'] = '%s [%s]' % (protein, smodes[mode])
this_anno['x'] = n % 2 / 2.0 + 0.25
this_anno['y'] = 1.0 - (0.48 / nrows * np.floor(n / 2) + 0.48 / nrows / 2.0)
this_anno['xanchor'] = 'center'
this_anno['yanchor'] = 'middle'
param['data'].append(this_data)
param['layout']['annotations'].append(this_anno)
n += 1
layout = go.Layout(annotations = param['layout']['annotations'],
height = height, title = main_title,
#width = 600, autosize = False
)
fig = go.Figure(data = traces, layout = layout)
# print(param)
#fig['layout'].update(showlegend = True, title = 'Lipid classes by protein')
pl.iplot(fig, show_link = False)