Metabolite Enrichment Analysis
A. Dugourd
12/01/2023
Source:vignettes/metabolic_pathway_enrichment.Rmd
metabolic_pathway_enrichment.Rmd
Tutorial with a kidney cancer toy metabolomic dataset
library(ocean)
library(decoupleR)
library(dplyr)
Differential analysis
unique(toy_targets$condition)
comparisons <- list('tumorVsHealthy' = c(1,-2))
limmaRes <- runLimma(measurements = toy_metabolomic_data,
targets = toy_targets,
comparisons = comparisons)
Format differential analysis result
t_table <- ttop_list_to_t_table(
limma_res_to_ttop_list(limma_res = limmaRes,
comp_names = names(comparisons),
number = length(toy_metabolomic_data[,1]),
adjust.method = "fdr"))
t_table <- t_table_metactivity_input_formater(
metabolomic_t_table = t_table,
mapping_table = mapping_table,
affixes = c("c","l","x","m","e","n","r")
)
Select pathways
Select all pathways from recon2_redhuman$pathway
to be
included in network
all_pathways <- as.vector(unique(recon2_redhuman$pathway))
all_pathways <- all_pathways[!is.na(all_pathways)]
Create a reaction network from the recon_redhuman model (metabolites & enzymes)
reaction_network <- model_to_pathway_sif(pathway_to_keep = all_pathways$X1)
Translate enzyme complexes by mapping identifiers to names
reaction_network <- translate_complexes(reaction_network)
Create data frame metabolite - affiliated enzyme instead of source-target
metabolites <- rearrange_dataframe(reaction_network)
Remove compartment information and “cpd:” to get pure KEGG IDs
metabolites$metabolites <- get_pure_kegg_ids(metabolites$metabolites)
metabolites <- distinct(metabolites) #keep only unique rows
Map pathways to metabolites
metabolites_pathway_df <- map_pathways_to_metabolites(metabolites)
Save metabolites-pathways data frame
#write.csv(metabolites_pathway_df, "./results/metabolites_pathway_df.csv")
Metabolite enrichment analysis with package decoupleR
Calculate the activity enrichment score and p-value of all pathways by using the conditions in the matrix (patient samples vs metabolite expression) by calculating the mean over the expression of all metabolites.
Prepare input data
network <- metabolites_pathway_df #input 1: pathways and their associated metabolites
network$mor <- 1 #add new column for mode of regulation
# network$likelihood <- 1 #add new column for edge likelihood
t_table_kegg <- t_table
t_table_kegg$KEGG <- get_pure_kegg_ids(t_table_kegg$KEGG) #remove compartment info
t_table_kegg <- unique(t_table_kegg)
row.names(t_table_kegg) <- t_table_kegg$KEGG #convert kegg column to row names
t_table_kegg$KEGG <- NULL #delete kegg ids column
t_table_kegg <- unique(t_table_kegg) #ensure only 1 kegg is mapped to 1 metabolite
mat <- as.matrix(t_table_kegg) #input 2: expression matrix
Perform enrichment analysis
enrichment <- run_wmean(mat, network,
.source = .data$pathway,
.target = .data$metabolites,
.mor = .data$mor,
# .likelihood = .data$likelihood,
times = 1000, minsize = 5) #randomize matrix
enrichment$condition <- NULL
enrichment <- as.data.frame(enrichment)
colnames(enrichment) <- c("statistic", "pathway", "score", "p-value")
Keep only rows with statistic “normalized_mean”
enrichment_norm <- enrichment[enrichment$statistic == "norm_wmean", ]
Visualize most significant pathways in a bar plot
score <- 1.0 #define cutoff score
barplot = plot_significant_pathways(enrichment_norm, score)
barplot
#ggsave("enriched_pathways.png", plot = barplot,
# path = "results/", scale = 1, dpi = 300, limitsize = TRUE)
## R version 4.4.2 (2024-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## time zone: UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.37 desc_1.4.3 R6_2.5.1 fastmap_1.2.0
## [5] xfun_0.49 cachem_1.1.0 knitr_1.48 htmltools_0.5.8.1
## [9] rmarkdown_2.29 lifecycle_1.0.4 cli_3.6.3 sass_0.4.9
## [13] pkgdown_2.1.1 textshaping_0.4.0 jquerylib_0.1.4 systemfonts_1.1.0
## [17] compiler_4.4.2 tools_4.4.2 ragg_1.3.3 evaluate_1.0.1
## [21] bslib_0.8.0 yaml_2.3.10 jsonlite_1.8.9 rlang_1.1.4
## [25] fs_1.6.5 htmlwidgets_1.6.4