#' @title Aggregating signature counts from MFF files from one or more studies
#' (folders).
#'
#' @description Aggregate signature counts data using archaic_prepare() from
#' multiple study directories into a single data frame or matrix.
#'
#' @param dat may be a list of count data matrices generated by archaic_prepare()
#' or a vector of folder names containing MFF files.
#'
#' @return The function creates a matrix of counts with combined samples from
#' multiple studies (stored as .csv MFF files and/or .RData file). The number of
#' rows of this matrix is same as the number of study samples across all the
#' studies considered, with columns representing mismatch signatures and the c
#' matrix cells recording the counts of the signatures occurring in the samples.
#'
#'
#' @keywords archaic_pool
#' @export
archaic_pool = function(dat){
message("Checking if the folders exist")
if(class(dat) == "list"){
############################ when dat is a list generated by archaic_prepare ##############################
cat("The data is read as a list of matrices - processed by archaic_prepare() \n")
datalist <- dat
sig_names <- colnames(datalist[[1]])
row_names_pool <- rownames(datalist[[1]])
if(length(datalist) >= 2){
for(num in 2:length(datalist)){
sig_names <- union(sig_names, colnames(datalist[[num]]))
row_names_pool <- c(row_names_pool, rownames(datalist[[num]]))
}
}
pooled_data <- matrix(0, length(row_names_pool), length(sig_names))
rownames(pooled_data) <- row_names_pool
colnames(pooled_data) <- sig_names
for(num in 1:length(datalist)){
pooled_data[match(rownames(datalist[[num]]), rownames(pooled_data)),
match(colnames(datalist[[num]]), sig_names)] <- as.matrix(datalist[[num]])
}
}else if(class(dat) == "character"){
############## when dat is a vector of folder names generated by archaic_prepare ##############################
message("The data is read as names of folders")
folders <- dat
for(i in 1:length(folders)){
if(!file.exists(folders[i]))
stop(paste0("The folder", folders[i], "in the folder list does not exist: aborting"))
}
datalist <- list()
for(numdir in 1:length(folders)){
if(file.exists(paste0(folders[numdir], tail(strsplit(folders[numdir], "/")[[1]],1), ".rda"))){
datalist[[numdir]] <- get(load(paste0(folders[numdir], tail(strsplit(folders[numdir], "/")[[1]],1), ".rda")))
cat("Successfully read .RData file from the folder, ", folders[numdir], "CHECK : \n")
}else{
message(".RData file not found in folder", folders[numdir], "running
archaic_prepare on the MFF files in the folder")
proc_out <- archaic_prepare(folders[numdir])
datalist[[numdir]] <- proc_out
}
labs <- c(labs, rep(tail(strsplit(folders[numdir], "/")[[1]],1), dim(datalist[[numdir]])[1]))
}
sig_names <- colnames(datalist[[1]])
row_names_pool <- rownames(datalist[[1]])
if(length(datalist) >= 2){
for(num in 2:length(datalist)){
sig_names <- union(sig_names, colnames(datalist[[num]]))
row_names_pool <- c(row_names_pool, rownames(datalist[[num]]))
}
}
pooled_data <- matrix(0, length(row_names_pool), length(sig_names))
rownames(pooled_data) <- row_names_pool
colnames(pooled_data) <- sig_names
for(num in 1:length(datalist)){
pooled_data[match(rownames(datalist[[num]]), rownames(pooled_data)),
match(colnames(datalist[[num]]), sig_names)] <- as.matrix(datalist[[num]])
}
}
return(pooled_data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.