Metabolite Enrichment Analysis

Install ocEAn

library(devtools)
install_github("saezlab/ocean")

Tutorial with a kidney cancer toy metabolomic dataset

library(ocean)
library(decoupleR)
library(dplyr)

Differential analysis

unique(toy_targets$condition)
comparisons <- list('tumorVsHealthy' = c(1,-2))

limmaRes <- runLimma(measurements = toy_metabolomic_data,
                     targets = toy_targets,
                     comparisons = comparisons)

Format differential analysis result

t_table <- ttop_list_to_t_table(
  limma_res_to_ttop_list(limma_res = limmaRes,
                         comp_names = names(comparisons),
                         number = length(toy_metabolomic_data[,1]),
                         adjust.method = "fdr"))

t_table <- t_table_metactivity_input_formater(
  metabolomic_t_table = t_table,
  mapping_table = mapping_table,
  affixes = c("c","l","x","m","e","n","r")
)

Select pathways

Select all pathways from recon2_redhuman$pathway to be included in network

all_pathways <- as.vector(unique(recon2_redhuman$pathway))
all_pathways <- all_pathways[!is.na(all_pathways)]

Create a reaction network from the recon_redhuman model (metabolites & enzymes)

reaction_network <- model_to_pathway_sif(pathway_to_keep = all_pathways$X1)

Translate enzyme complexes by mapping identifiers to names

reaction_network <- translate_complexes(reaction_network)

Create data frame metabolite - affiliated enzyme instead of source-target

metabolites <- rearrange_dataframe(reaction_network)

Remove compartment information and “cpd:” to get pure KEGG IDs

metabolites$metabolites <- get_pure_kegg_ids(metabolites$metabolites)
metabolites <- distinct(metabolites)  #keep only unique rows

Map pathways to metabolites

metabolites_pathway_df <- map_pathways_to_metabolites(metabolites)

Save metabolites-pathways data frame

#write.csv(metabolites_pathway_df, "./results/metabolites_pathway_df.csv")

Metabolite enrichment analysis with package decoupleR

Calculate the activity enrichment score and p-value of all pathways by using the conditions in the matrix (patient samples vs metabolite expression) by calculating the mean over the expression of all metabolites.

Prepare input data

network <- metabolites_pathway_df    #input 1: pathways and their associated metabolites
network$mor <- 1                     #add new column for mode of regulation
# network$likelihood <- 1              #add new column for edge likelihood

t_table_kegg <- t_table
t_table_kegg$KEGG <- get_pure_kegg_ids(t_table_kegg$KEGG) #remove compartment info
t_table_kegg <- unique(t_table_kegg)
row.names(t_table_kegg) <- t_table_kegg$KEGG  #convert kegg column to row names
t_table_kegg$KEGG <- NULL            #delete kegg ids column
t_table_kegg <- unique(t_table_kegg) #ensure only 1 kegg is mapped to 1 metabolite
mat <- as.matrix(t_table_kegg)       #input 2: expression matrix

Perform enrichment analysis

enrichment <- run_wmean(mat, network,
                          .source = .data$pathway,
                          .target = .data$metabolites,
                          .mor = .data$mor, 
                          # .likelihood = .data$likelihood,  
                       times = 1000, minsize = 5)  #randomize matrix

enrichment$condition <- NULL
enrichment <- as.data.frame(enrichment)
colnames(enrichment) <- c("statistic", "pathway", "score", "p-value")

Keep only rows with statistic “normalized_mean”

enrichment_norm <- enrichment[enrichment$statistic == "norm_wmean", ]

Visualize most significant pathways in a bar plot

score <- 1.0  #define cutoff score
barplot = plot_significant_pathways(enrichment_norm, score)
barplot

#ggsave("enriched_pathways.png", plot = barplot,
#       path = "results/", scale = 1, dpi = 300, limitsize = TRUE)

sessionInfo()

## R version 4.5.1 (2025-06-13)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
##  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
##  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
## [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
## 
## time zone: UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.37     desc_1.4.3        R6_2.6.1          fastmap_1.2.0    
##  [5] xfun_0.53         cachem_1.1.0      knitr_1.50        htmltools_0.5.8.1
##  [9] rmarkdown_2.30    lifecycle_1.0.4   cli_3.6.5         sass_0.4.10      
## [13] pkgdown_2.1.3     textshaping_1.0.4 jquerylib_0.1.4   systemfonts_1.3.1
## [17] compiler_4.5.1    tools_4.5.1       ragg_1.5.0        evaluate_1.0.5   
## [21] bslib_0.9.0       yaml_2.3.10       jsonlite_2.0.0    rlang_1.1.6      
## [25] fs_1.6.6          htmlwidgets_1.6.4

A. Dugourd

12/01/2023