R/make_PAC.R

Defines functions make_PAC

Documented in make_PAC

#' Make PAC
#'
#' \code{make_PAC} Compiles, order and checks the assumptions of a PAC file.
#'
#' Given a pheno, an anno and a counts data.frames the functions will safely
#' generate a simple PAC list-object.
#'
#' @family PAC generation
#'
#' @seealso \url{https://github.com/Danis102} for updates on the current
#'   package.
#'
#' @param pheno Data.frame with sample names as row names containing metadata
#'   about each sample. Can be generated by the \code{\link{make_pheno}}
#'   function.
#'
#' @param anno Data.frame with unique sequences as row names containing metadata
#'   about each sequence. If anno=NULL, a simple Anno data.frame will
#'   automatically be generated from the the sequence names in counts.
#'   Annotations for each sequence can be extended using for example the reanno
#'   workflow (see vignette).
#'
#' @param counts Data.frame representing a counts table, with column names as
#'   sample names and sequence names as row names. Contains the counts for each
#'   sequence across the samples. Can be generated by the
#'   \code{\link{make_counts}}.
#'   
#' @param output Character indicating output format. If type="S4" (default),
#'   then the PAC object is returned as an S4 object. If type="S3", the
#'   PAC object will be returned as a list of data.frames
#'   
#'
#' @return Ordered PAC list object, checked for compatibility with downstream
#'   analysis in seqpac.
#'   
#' @examples
#'
#' ### First make counts 
#' 
#' # Seqpac includes strongly down-sampled smallRNA fastq.
#' sys_path = system.file("extdata", package = "seqpac", mustWork = TRUE)
#' input <- list.files(path = sys_path, pattern = "fastq", all.files = FALSE,
#'                 full.names = TRUE)
#'
#' # Notice that make_counts will generate another temp folder, that will 
#' # be emptied on finalization. By setting save_temp=TRUE you may save the 
#' # content.  
#'  
#' counts  <- make_counts(input, threads=2, parse="default_neb",
#'                        trimming="seqpac", plot=TRUE,
#'                        evidence=c(experiment=2, sample=1))
#'
#' colnames(counts$counts)
#' 
#' ### Then generate a phenotype table with make_pheno
#'
#' #  Note:  'Sample_ID' column needs to be similar IDs as 
#' #          colnames in the counts table. You may also 
#' #          specify a path to a txt file.
#'
#' Sample_ID <- colnames(counts$counts)
#'
#' pheno <- data.frame(Sample_ID=Sample_ID,
#'                        Treatment=c(rep("heat", times=1), 
#'                                    rep("control", times=2)),
#'                        Batch=rep(c("1", "2", "3"), times=1)) 
#' 
#' pheno <- make_pheno(pheno=pheno, progress_report=counts$progress_report, 
#'                      counts=counts$counts)
#'
#'
#' pheno 
#'
#' # Note that progress report from make_counts is added if you specify it  
#'      
#' ### Lastly combine into PAC
#' 
#' pac <- make_PAC(pheno=pheno, counts=counts$counts)
#'
#'
#' pac
#' names(pac)
#' 
#' # Note: a simple annotation table is added automatically.
#' head(anno(pac))
#'
#' # Clean up temp
#'closeAllConnections()
#'fls_temp  <- list.files(tempdir(), recursive=TRUE, full.names = TRUE)
#'file.remove(fls_temp, showWarnings=FALSE)
#'
#' @export

make_PAC<- function(counts, pheno=NULL, anno=NULL, output="S4"){
  if(methods::is(counts, "list")){
    counts <- counts[["counts"]]
  }
  if(is.null(anno)){
    anno <- data.frame(Size=nchar(rownames(counts)))
    rownames(anno) <- rownames(counts)
  }
  if(is.null(pheno)){
    pheno <- data.frame(Sample_ID=colnames(counts))
    rownames(pheno) <- colnames(counts)
  }
  PAC_unord <- list(Pheno=pheno, Anno=anno, Counts=counts)
  stopifnot(!any(!unlist(lapply(PAC_unord, is.data.frame))))
  stopifnot(nrow(anno) == nrow(counts))
  stopifnot(nrow(pheno) == ncol(counts))
  ph_ord <- colnames(PAC_unord$Counts)
  seq_ord <- rownames(PAC_unord$Counts)
  PAC_unord$Anno <- PAC_unord$Anno[match(rownames(PAC_unord$Anno), 
                                         seq_ord),, drop=FALSE]
  PAC_unord$Pheno <- PAC_unord$Pheno[match(rownames(PAC_unord$Pheno), ph_ord)
                                     ,, drop=FALSE]
  
  if(output=="S3"){
      class(PAC_unord) <-  c("list", "PAC_S3")
      stopifnot(PAC_check(PAC_unord))
  }
  
  if(output=="S4"){
    
    PAC_unord <- PAC( Pheno=PAC_unord$Pheno,
                      Anno=PAC_unord$Anno,
                      Counts=PAC_unord$Counts,
                      norm=list(NULL),
                      summary=list(NULL)
                      )
  }
  return(PAC_unord)
}
Danis102/seqpac documentation built on Aug. 26, 2023, 10:15 a.m.