Skip to contents

Install ocEAn

library(devtools)
install_github("saezlab/ocean")

Tutorial with a kidney cancer toy metabolomic dataset

library(ocean)
library(decoupleR)
library(dplyr)

Differential analysis

unique(toy_targets$condition)
comparisons <- list('tumorVsHealthy' = c(1,-2))

limmaRes <- runLimma(measurements = toy_metabolomic_data,
                     targets = toy_targets,
                     comparisons = comparisons)

Format differential analysis result

t_table <- ttop_list_to_t_table(
  limma_res_to_ttop_list(limma_res = limmaRes,
                         comp_names = names(comparisons),
                         number = length(toy_metabolomic_data[,1]),
                         adjust.method = "fdr"))

t_table <- t_table_metactivity_input_formater(
  metabolomic_t_table = t_table,
  mapping_table = mapping_table,
  affixes = c("c","l","x","m","e","n","r")
)

Select pathways

Select all pathways from recon2_redhuman$pathway to be included in network

all_pathways <- as.vector(unique(recon2_redhuman$pathway))
all_pathways <- all_pathways[!is.na(all_pathways)]

Create a reaction network from the recon_redhuman model (metabolites & enzymes)

reaction_network <- model_to_pathway_sif(pathway_to_keep = all_pathways$X1)

Translate enzyme complexes by mapping identifiers to names

reaction_network <- translate_complexes(reaction_network)

Create data frame metabolite - affiliated enzyme instead of source-target

metabolites <- rearrange_dataframe(reaction_network)

Remove compartment information and “cpd:” to get pure KEGG IDs

metabolites$metabolites <- get_pure_kegg_ids(metabolites$metabolites)
metabolites <- distinct(metabolites)  #keep only unique rows

Map pathways to metabolites

metabolites_pathway_df <- map_pathways_to_metabolites(metabolites)

Save metabolites-pathways data frame

#write.csv(metabolites_pathway_df, "./results/metabolites_pathway_df.csv")

Metabolite enrichment analysis with package decoupleR

Calculate the activity enrichment score and p-value of all pathways by using the conditions in the matrix (patient samples vs metabolite expression) by calculating the mean over the expression of all metabolites.

Prepare input data

network <- metabolites_pathway_df    #input 1: pathways and their associated metabolites
network$mor <- 1                     #add new column for mode of regulation
# network$likelihood <- 1              #add new column for edge likelihood

t_table_kegg <- t_table
t_table_kegg$KEGG <- get_pure_kegg_ids(t_table_kegg$KEGG) #remove compartment info
t_table_kegg <- unique(t_table_kegg)
row.names(t_table_kegg) <- t_table_kegg$KEGG  #convert kegg column to row names
t_table_kegg$KEGG <- NULL            #delete kegg ids column
t_table_kegg <- unique(t_table_kegg) #ensure only 1 kegg is mapped to 1 metabolite
mat <- as.matrix(t_table_kegg)       #input 2: expression matrix

Perform enrichment analysis

enrichment <- run_wmean(mat, network,
                          .source = .data$pathway,
                          .target = .data$metabolites,
                          .mor = .data$mor, 
                          # .likelihood = .data$likelihood,  
                       times = 1000, minsize = 5)  #randomize matrix

enrichment$condition <- NULL
enrichment <- as.data.frame(enrichment)
colnames(enrichment) <- c("statistic", "pathway", "score", "p-value")

Keep only rows with statistic “normalized_mean”

enrichment_norm <- enrichment[enrichment$statistic == "norm_wmean", ]

Visualize most significant pathways in a bar plot

score <- 1.0  #define cutoff score
barplot = plot_significant_pathways(enrichment_norm, score)
barplot

#ggsave("enriched_pathways.png", plot = barplot,
#       path = "results/", scale = 1, dpi = 300, limitsize = TRUE)
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
## 
## locale:
##  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
##  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
##  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
## [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] knitr_1.41        magrittr_2.0.3    R6_2.5.1          ragg_1.2.5       
##  [5] rlang_1.0.6       fastmap_1.1.0     stringr_1.5.0     tools_4.2.2      
##  [9] xfun_0.36         cli_3.6.0         jquerylib_0.1.4   systemfonts_1.0.4
## [13] htmltools_0.5.4   yaml_2.3.6        digest_0.6.31     rprojroot_2.0.3  
## [17] lifecycle_1.0.3   pkgdown_2.0.7     textshaping_0.3.6 purrr_1.0.1      
## [21] sass_0.4.4        vctrs_0.5.1       fs_1.5.2          memoise_2.0.1    
## [25] glue_1.6.2        cachem_1.0.6      evaluate_0.19     rmarkdown_2.19   
## [29] stringi_1.7.12    compiler_4.2.2    bslib_0.4.2       desc_1.4.2       
## [33] jsonlite_1.8.4