R/getData.R

Defines functions getData

Documented in getData

#' getData
#'
#' retreive and parse sample names, retrieve metabolite data.  returns as list of two data frames
#' @details convenience function for parsing sample names and returning a dataset.   
#' @param ramclustObj ramclustR object to retrieve data from
#' @param which.data character; which dataset (SpecAbund or SpecAbundAve) to reference
#' @param delim  character; "-" by default - the delimiter for parsing sample names to factors  
#' @param cmpdlabel = "cmpd";  label the data with the annotation. can also be set to 'ann' for column names assigned as annotatins.
#' @param filter = TRUE; logical, if TRUE, checks for $cmpd.use slot generated by rc.cmpd.cv.filter() function, and only gets acceptable compounds.  
#' @return returns a list of length 3: $design is the experimental sample factors after parsing by the delim, $data is the dataset, $full.data is merged $des and $data data.frames.
#' @concept RAMClustR
#' @author Corey Broeckling

#' @export 

getData<-function(ramclustObj=RC,
                  which.data="SpecAbund",
                  delim="-",
                  cmpdlabel="cmpd",
                  filter = FALSE
) {
  if(is.null(ramclustObj$phenoData)) {  
    dat <- ramclustObj[[which.data]]
    if(filter) {
      if(!is.null(ramclustObj$cmpd.use)) {
        if(ncol(dat) == length(ramclustObj$cmpd.use)) {
          cmpd.use <- which(ramclustObj$cmpd.use)
          dat <- dat[,cmpd.use]
        }
      }
    }

    if(length(ramclustObj[[cmpdlabel]]) == dim(ramclustObj[[which.data]])[2]) {
      names(dat) <- ramclustObj[[cmpdlabel]]
    } else {
      stop(paste("ramclustObj slot", cmpdlabel, "has length", length(ramclustObj[[cmpdlabel]]), "while the", which.data, "dataset has", dim(ramclustObj[[which.data]])[2], "columns", '\n'))
    }
    des <- data.frame(t(data.frame(strsplit(row.names(dat), delim), check.names = FALSE)), stringsAsFactors = FALSE, check.names = FALSE)
    row.names(des) <- row.names(dat)
    
    factors<-sapply(1:nrow(dat), FUN=function(x) length(strsplit(as.character(dimnames(dat)[[1]]), delim)[[x]]))
    maxfact<-max(factors)
    factnames<-c(ramclustObj$ExpDes$design[which(row.names(ramclustObj$ExpDes$design)=="fact1name"): 
                                             (which(row.names(ramclustObj$ExpDes$design)=="fact1name")+(maxfact-1)), 1])
    
    names(des)<-factnames
    dat<-list("design" = des, "data" = dat, "full.data" = cbind(des, dat))
  } else {
    if(nrow(ramclustObj$phenoData) != nrow(ramclustObj[[which.data]])) {
      stop('number of rows for phenoData and ', which.data, ' are not the same.', '\n')
    }
    if(filter) {
      if(!is.null(ramclustObj$cmpd.use)) {
        cmpd.use <- which(ramclustObj$cmpd.use)
      }else {
        cmpd.use <- 1:length(ramclustObj$ann)
      }
    } else {
      cmpd.use <- 1:length(ramclustObj$ann)
    }
    dat <- list(
      "design" = ramclustObj$phenoData, 
      "data" = ramclustObj[[which.data]][,cmpd.use], 
      "full.data" = data.frame(ramclustObj$phenoData, ramclustObj[[which.data]][,cmpd.use])
    )
  }
  return(dat)
}	
cbroeckl/csu.pmf.tools documentation built on Jan. 26, 2024, 6:27 p.m.