# File loading functionality
#' Load data sets from pre-specified sources
#' @description This function loads all the necessary data sets (metabolomics and 16S only) from
#' pre-specified sources. The difference between `taxa` and `timetaxa` is that `timetaxa` has specific
#' taxa based on time while `taxa` collects data jointly analzyed between the 6W and 12M time point. All
#' directories should be in a \code{data_directory.csv} file.
#' @param type Type of data required to load, can be of class "metabo", "taxa", "reference" or "all" or "timetaxa"
#' @param file Directory towards the \code{data_directory.csv} file.
#' @export
load_data <- function(file, type){
# error handling
if(dir.exists("//afs/northstar/users") == F){
stop("Cannot load files, either AFS is not connected or the files do not exist and have changed")
}
dir <- utils::read.csv(file, row.names = 1) # data directory file
dir$directory <- as.character(dir$directory)
if (type == "reference" | type == "all"){
message("loading reference data...")
mblreference <- utils::read.csv(file = dir["mblreference",])
}
if (type == "metabo" | type == "all"){
message("loading metabolic data...")
metabo <- readRDS(file = dir["metabo",])
metabo.key <- readRDS(file = dir["metabokey",])
}
if (type == "timetaxa" | type == "all"){
message("Loading time point specific taxa data...")
tax.6W <- readRDS(dir["tax6W",])
tax.12M <- readRDS(dir["tax12M",])
tax.6W.key <- readRDS(dir["tax6Wkey",])
tax.12M.key <- readRDS(dir["tax12Mkey",])
}
if (type == "taxa" | type == "all"){
message("loading joint processed data ...")
tax <- readRDS(dir["taxall",])
tax.key <- readRDS(dir["taxallkey",])
}
# object export
message("Exporting to File...")
if (type == "all"){
result <- list(metabo, metabo.key, tax.6W, tax.12M, tax.6W.key, tax.12M.key, mblreference, tax, tax.key)
names(result) <- c('metabo', 'metabo.key',
'tax.6W', 'tax.12M', 'tax.6W.key', 'tax.12M.key',
'mblreference',
"full.tax", "full.tax.key")
} else if (type == "metabo"){
result <- list(metabo, metabo.key)
names(result) <- c('metabo','metabo.key')
} else if (type == "taxa"){
result <- list(tax.6W, tax.12M, tax.6W.key, tax.12M.key)
names(result) <- c('tax.6W', 'tax.12M', 'tax.6W.key', 'tax.12M.key')
} else if (type == "reference"){
result <- list(mblreference)
names(result) <- c('mblreference')
}
return(result)
}
#' @title Main function to load all data
#' @description Stringing all data loading functions together to load matching metabolomics and 16S data sets.
#' For now, the function doesn't have matching between 6W and 12M samples.
#' @param file Directory containing the file \code{data_directory.csv}.
#' @export
load_main <- function(file){
data <- load_data(file = file, type = "all") #load all possible data
# separate by time
metabo.6W <- data$metabo[data$metabo$TimePeriod == '6W',]
metabo.12M <- data$metabo[data$metabo$TimePeriod == '12M',]
# impute and remove missing labels
message("For 12M samples...")
metabo.12M <- missing.imputation(reference = data$mblreference, data = metabo.12M, timelabel = "12M")
extraction.index <- extraction(metabo.12M, tax = data$full.tax)
metabo.12M <- extraction.index[[1]]
tax.12M <- extraction.index[[2]]
message("For 6W samples...")
metabo.6W <- missing.imputation(reference = data$mblreference, data = metabo.6W, timelabel = "6W")
extraction.index <- extraction(metabo.6W, tax = data$full.tax)
metabo.6W <- extraction.index[[1]]
tax.6W <- extraction.index[[2]]
# Post processing
message("Post processing...")
# assigning similar row names and removing the first 6 unecessary columns
rownames(metabo.12M) <- metabo.12M$mblid
metabo.12M <- metabo.12M[,-c(1:6)]
rownames(metabo.6W) <- metabo.6W$mblid
metabo.6W <- metabo.6W[,-c(1:6)]
# dealing with p-histidine which is a pain
colnames(metabo.12M)[ncol(metabo.12M)] <- "p-Methylhistidine"
colnames(metabo.6W)[ncol(metabo.6W)] <- "p-Methylhistidine"
# returning all data frame in proper matrix form
names <- rownames(metabo.12M)
metabo.12M <- apply(metabo.12M,MARGIN = 2, as.numeric)
rownames(metabo.12M) <- names
names <- rownames(metabo.6W)
metabo.6W <- apply(metabo.6W, MARGIN = 2, as.numeric)
rownames(metabo.6W) <- names
rm(names)
# removing the standard column
metabo.12M <- metabo.12M[,-7]
metabo.6W <- metabo.6W[,-7]
#transforming the taxonomic matrix back to matrix form
tax.12M <- as.matrix(tax.12M)
tax.6W <- as.matrix(tax.6W)
result <- list(metabo.6W, metabo.12M, tax.6W, tax.12M, data$metabo.key, data$full.tax.key)
names(result) <- c("metabo.6W", "metabo.12M", "tax.6W", "tax.12M", "metabo.key", "tax.key")
return(result)
}
#' @title Checking for phylogenetic tree and constructing tree
#' @description Check whether the tree file exists in a directory. If not, construct tree from
#' 16S sequences stored in the joint analysis file (Available for taxa-all options only). The FastTree
#' program will be run externally to generate trees accordingly.
#' @param file Directory containing the \code{data_directory.csv} file.
#' @param progfile Directory containing the \code{FastTree} executable.
#' @examples
#' # do not run
#' # tree.check(file = "./data_directory.csv", progfile = "../software/FastTree")
#' @export
tree.check <- function(file, progfile){
dir <- utils::read.csv(file, row.names = 1)
dir$directory <- as.character(dir$directory)
if (!file.exists("./tree.tre")){
message("No tree exists")
seqs <- readRDS(dir["treeall",])
alignment <- DECIPHER::AlignSeqs(Biostrings::DNAStringSet(seqs), anchor = NA, verbose = TRUE)
phagAlign <- phangorn::phyDat(methods::as(alignment, "matrix"), type = "DNA")
phangorn::write.phyDat(phagAlign, file = "alignment.fasta", format = "fasta")
system(paste(progfile,"-gtr -nt alignment.fasta > tree.tre"))
}
tree <- ape::read.tree(file = "tree.tre")
return(tree)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.