#' find files
#'
#' The function looks recursively inside a folder and find specific files. Used internally.
#'
#' @param path The folder to look into
#' @param reg_expr Regexp to look
#' @return A dataframe
#' @export
rich_find <- function(path = path, reg_expr){
# "report.*(pos|neg).*tsv"
# ".*png$"
files_reports <- data.frame(
file = fs::dir_ls(path, recurse = TRUE, regexp = reg_expr)
)
if(nrow(files_reports) == 0){
stop("No relevant files detected in the folder!")
}
# check the files at the same folder depth
splitted_files <- strsplit(files_reports$file, "/")
path_depth <- unlist(lapply(splitted_files, length))
# if all the path_dept are equal to the first, then we have as many TRUE as the vector elements.
if(!sum(path_depth == path_depth[[1]]) == length(path_depth)){
stop("Not all the file of that at the same path depth!!")
}
path_depth <- path_depth[[1]]
# easier to split by /, escape makes problems in separate
if (Sys.info()['sysname'] == "Windows"){
files_reports$file <- gsub("\\\\", "/", files_reports$file)
}
files_reports <- tidyr::separate(files_reports,
col = file,
into = as.character(1:path_depth[1]),
sep = "/",
remove = FALSE)
# we are subsetting by position so if the folder tree is different we are screwed
# we want to select the second to the last (contrast) and one to the last(gs)
# remove _ from them
# files_reports[path_depth - 2:1] <- apply(files_reports[path_depth - 2:1], 2, function(x) sub("_", "", x))
files_reports[, "id"] <- paste0(files_reports[, path_depth - 2], "~", files_reports[, path_depth - 1])
files_reports
}
#' rich_wider
#'
#' Given a dataframe generate by \code{\link{rich_aggregate}}, filters it to get a specific `gs`, it calculates
#' negative `log10(p.norm) * sign of NES`, filters out all `FDR.q.val > fdr_threshold` and
#' returns a wider datagrame with `pathways` as rows, `contrasts` as
#' column and one of the columns as the `value`. When a combination of contrast-pathway is not presence a 0 is substituted
#' to NA when pivoting.
#'
#' @param dat Dataframe as returned by \code{\link{rich_aggregate}}.
#' @param fdr_threshold Minimum `FDR.q.val` to keep.
#' @param gs Gene Set Database.
#' @param value Column from which to take the value for pivit wider. One of c("fdr_q_val", "nes", "nom_p_val", "n_logp_sign").
#' Defaut is "n_logp_sign".
#' @export
rich_wider <- function(dat,
fdr_threshold,
gs,
value = "n_logp_sign"){
# mr flick
defined <- ls()
passed <- names(as.list(match.call())[-1])
if (any(!defined %in% passed)) {
stop(paste("missing values for", paste(setdiff(defined, passed), collapse=", ")))
}
if(!is.numeric(fdr_threshold)){
stop("fdr_threshold is not numeric!")
}
if(!value %in% c("fdr_q_val", "nes", "nom_p_val", "n_logp_sign")){
stop("value needs to be one of c(`fdr_q_val`, `nes`, `nom_p_val`, `n_logp_sign`)!")
}
if(!gs %in% unique(dat$gs)){
stop("gs argument is not an among the elements of the column gs!!!")
}
dat$nom_p_val[dat$nom_p_val == 0 ] <- 10^-10
dat[ ,"n_logp_sign"] <- -log10(dat$nom_p_val) * sign(dat$nes)
dat <- dat[dat$gs == gs,]
dat <- dat[dat$fdr_q_val < fdr_threshold,]
dat_wider <- as.data.frame(tidyr::pivot_wider(dat,
id_cols = "description",
names_from = "contrast",
values_from = value,
# values_fn = length,
values_fill = 0))
row.names(dat_wider) <- dat_wider$description
dat_wider$description <- NULL
dat_wider
}
#' aggregate enriched data
#'
#' The function looks recursively inside a folder and extract and bind together enrichment data (`report.*.tsv`)
#' generated by \href{https://www.gsea-msigdb.org/gsea/index.jsp}{gsea}. If not path is specified it opens
#' an interactive `{svDialogs}` modal dialog box to chose the folder.
#'
#'
#' @param path The folder to look into, default is `svDialogs::dlg_dir()$res`
#' @return A dataframe of enriched results.
#' @export
rich_aggregate <- function(
path = svDialogs::dlg_dir()$res){
if(path == "svDialogs::dlg_dir()$res") eval(path)
files_reports <- rich_find(path = path, reg_expr = "report.*(pos|neg).*tsv")
message("Importing files ...")
rich_ls <- lapply(files_reports$file, data.table::fread, showProgress = TRUE)
names(rich_ls) <- files_reports$id
# remove empty dataframe
nrows_rich_ls <- pbapply::pblapply(rich_ls, nrow)
to_keep <- nrows_rich_ls != 0
rich_df <- dplyr::bind_rows(rich_ls[to_keep], .id = "id")
# clean names
names(rich_df) <- gsub("\\.|\\s|-", "_", tolower(names(rich_df)))
names(rich_df)[names(rich_df) == "name"] <- "description"
rich_df <- tidyr::separate(rich_df, col = "id", into = c("contrast", "gs"), sep = "~")
# remove unwanted columns
to_remove <- append(grep("gs.+", names(rich_df), value = TRUE, perl = TRUE), "v12")
rich_df[, to_remove] <- NULL
rich_df
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.