Context-specific metabolic model from transcriptomics data#

import numpy as np
import pandas as pd

import corneto as cn

cn.info()
Installed version:v1.0.0a0 (latest: v1.0.0-alpha)
Available backends:CVXPY v1.5.3, PICOS v2.4.17
Default backend (corneto.opt):CVXPY
Installed solvers:CLARABEL, CVXOPT, ECOS, ECOS_BB, GLPK, GLPK_MI, OSQP, SCIP, SCIPY, SCS
Graphviz version:v0.20.3
Repository:https://github.com/saezlab/corneto
from cobra.io import read_sbml_model

model = read_sbml_model("../../tests/gem/ecoli_core.zip")
G = cn.Graph.from_cobra_model(model)
G.shape
(77, 91)
df_expr = pd.read_csv("../../tests/gem/ecoli_example_data.zip")
df_expr
gene raw_value_1 raw_value_2 raw_value_3 raw_value_4 value
0 b0008 11.211265 12.030984 12.017371 11.674179 11.733450
1 b0114 11.767118 11.581649 11.885833 11.571045 11.701411
2 b0115 11.020531 10.813589 11.144573 11.362138 11.085208
3 b0116 10.299068 10.562963 10.788094 10.425459 10.518896
4 b0118 6.116122 6.437554 6.956686 6.229419 6.434945
... ... ... ... ... ... ...
131 b4153 9.647255 10.089110 8.685851 8.902372 9.331147
132 b4154 9.176739 9.387545 8.841818 8.510611 8.979178
133 b4232 9.490210 9.201019 9.321175 9.235511 9.311979
134 b4301 2.335597 2.338372 2.317967 2.270161 2.315524
135 b4395 2.431005 2.827751 2.822822 2.825172 2.726687

136 rows × 6 columns

from corneto.methods.metabolism import evaluate_gpr_expression, get_unique_genes

genes = get_unique_genes(G, startswith="b")
len(genes)
101
df_expr[["value"]].hist(bins=30)
array([[<Axes: title={'center': 'value'}>]], dtype=object)
../_images/27611de4a4c734e2de0efeed5b2473658f64b30fc94a6db3c07f3e877900f51e.png
df_expr["high"] = (df_expr["value"] >= 8).astype(int)
df_expr["low"] = (df_expr["value"] <= 4).astype(int)
df_expr["score"] = df_expr["high"] - df_expr["low"]
df_expr
gene raw_value_1 raw_value_2 raw_value_3 raw_value_4 value high low score
0 b0008 11.211265 12.030984 12.017371 11.674179 11.733450 1 0 1
1 b0114 11.767118 11.581649 11.885833 11.571045 11.701411 1 0 1
2 b0115 11.020531 10.813589 11.144573 11.362138 11.085208 1 0 1
3 b0116 10.299068 10.562963 10.788094 10.425459 10.518896 1 0 1
4 b0118 6.116122 6.437554 6.956686 6.229419 6.434945 0 0 0
... ... ... ... ... ... ... ... ... ...
131 b4153 9.647255 10.089110 8.685851 8.902372 9.331147 1 0 1
132 b4154 9.176739 9.387545 8.841818 8.510611 8.979178 1 0 1
133 b4232 9.490210 9.201019 9.321175 9.235511 9.311979 1 0 1
134 b4301 2.335597 2.338372 2.317967 2.270161 2.315524 0 1 -1
135 b4395 2.431005 2.827751 2.822822 2.825172 2.726687 0 1 -1

136 rows × 9 columns

values = df_expr[["gene", "score"]].set_index("gene").to_dict()["score"]
e = evaluate_gpr_expression(G.get_attr_from_edges("GPR"), values)
from corneto.methods.metabolism.fba import multicondition_imat

P = multicondition_imat(G, np.array(e))
P.solve(solver="SCIP")
Problem(Minimize(Expression(AFFINE, UNKNOWN, ())), [Inequality(Constant(CONSTANT, UNKNOWN, (91,))), Inequality(Variable((91,), _flow)), Equality(Expression(AFFINE, UNKNOWN, (77,)), Constant(CONSTANT, ZERO, ())), Inequality(Expression(AFFINE, NONNEGATIVE, (91,))), Equality(Expression(AFFINE, NONNEGATIVE, (36,)), Constant(CONSTANT, ZERO, ())), Inequality(Expression(AFFINE, UNKNOWN, (91,))), Inequality(Variable((91,), _flow)), Inequality(Expression(AFFINE, NONNEGATIVE, (91,))), Inequality(Variable((91,), _or, boolean=True))])
df_sol = pd.DataFrame(
    P.expr.flow.value, index=G.get_attr_from_edges("id"), columns=["flux"]
)
df_sol
flux
EX_ac_b 7.898471
EX_akg_b 0.000000
EX_co2_b 0.000000
EX_etoh_b 8.360426
EX_for_b 17.263878
... ...
TALA 0.043172
THD2 3.595203
TKT1 0.043172
TKT2 -0.043172
TPI 9.676129

91 rows × 1 columns

selected_reactions = df_sol[df_sol.flux.abs() >= 1e-6].index.values.tolist()
len(selected_reactions)
59
sum(P.expr._flow_ipos.value + P.expr._flow_ineg.value)
60.0