# For preprocessing raw data of affymetrix, illumina microarray and so on.
# affymetrix method is available for now.
#' @title Preprocess using oligo package
#'
#' @description Preprocess affymetrix cel files using oligo package,
#' parallel computing is supported
#'
#' @details Oligo package is a very useful and common bioconductor package used to preprocess
#' chips produced by Affymetrix and NimbleGen and use files provided by these
#' manufacturers in their native format. Here we use it load raw data files and
#' preprocess expressing data by implementing the RMA algorithm.
#'
#' @param data.dir a character. Set the directory path of raw data, use work directory as default.
#' @param Ncores a integer number. Set how many CPU cores you want to use for computation.
#' @param useMaxCores a logical value. Specify if all CPU cores used for computing, use TRUE as default.
#' if the value is TRUE, it will overwrite the Ncores parameter and find how many cores your machine have
#' and register the number for use. If you don't want this, please set the value to FALSE and set the Ncores.
#' @param Isgzipped a logical value. Specify if the cel files are gzipped, use TRUE as default.
#' @param useFullNames a logical value. If TRUE, the directory path is prepended to the file names to give a relative file path. If FALSE, the file names (rather than paths) are returned.
#' @param modifyNames a logical value. Raw data download from NCBI GEO database are named as
#' "GSMXXXXX.CEL.gz" if gzipped or "GSMXXXXX.CEL" if unzipped. If TRUE, ".CEL.gz" or ".CEL" will be chopped off.
#' @return an ExpressionSet Object
#' @author Shixiang Wang <w_shixiang@163.com>
#' @seealso \code{\link{ExpressionSet}}, \code{\link{rma}}
#' @import foreach
#' @import doMC
#' @importFrom oligo read.celfiles
#' @importFrom oligo rma
#' @importFrom oligoClasses list.celfiles
#' @export
affyPreprocess.oligo <- function(data.dir=getwd(), Ncores=1, useMaxCores=TRUE,
Isgzipped=TRUE, useFullNames=TRUE, modifyNames=TRUE){
if (useMaxCores==TRUE){
Ncores <- detectCores()}
registerDoMC(cores = Ncores)
# data.dir <- "./inst/tests/iGCC/cel_files/GSE7670/"
celFiles <- list.celfiles(data.dir, listGzipped = Isgzipped, full.names = useFullNames)
rawData <- read.celfiles(celFiles)
rmaRes <- oligo::rma(rawData)
if(modifyNames==TRUE){
if(Isgzipped==TRUE){
sampleNames(rmaRes) <- sub("(^GSM[0-9]+).+CEL\\.gz$","\\1",sampleNames(rmaRes))
}else{
sampleNames(rmaRes) <- sub("(^GSM[0-9]+).+CEL$","\\1",sampleNames(rmaRes))
}
}
return(rmaRes)
}
#' @title Preprocess using affy package
#'
#' @description Preprocess affymetrix cel files using affy package
#'
#' @details Affy package is a very useful bioconductor package used to preprocess
#' chips produced by Affymetrix and use files provided by the
#' manufacturers in their native format. Here we use it load raw data files and
#' preprocess expressing data by implementing the RMA, gcRMA algorithm and so on.
#'
#' @param data.dir a character. Set the directory path of raw data, use work directory as default.
#' @param Isgzipped a logical value. Specify if the cel files are gzipped, use TRUE as default.
#' @param useFullNames a logical value. If TRUE, the directory path is prepended to the file names to give a relative file path. If FALSE, the file names (rather than paths) are returned.
#' @param modifyNames a logical value. Raw data download from NCBI GEO database are named as
#' "GSMXXXXX.CEL.gz" if gzipped or "GSMXXXXX.CEL" if unzipped. If TRUE, ".CEL.gz" or ".CEL" will be chopped off.
#' @param method a character. "RMA" or "gcRMA" can be used.
#' @return an ExpressionSet Object
#' @author Shixiang Wang <w_shixiang@163.com>
#' @seealso \code{\link[biobase]{ExpressionSet}},\code{\link[oligo]{rma}}, \code{\link[affy]{rma}}, \code{\link[gcrma]{gcrma}}, \code{\link[iProfile]{affyPreprocess.oligo}}
#' @importFrom oligoClasses list.celfiles
#' @import affy
#' @importFrom gcrma gcrma
#' @export
affyPreprocess.affy <- function(data.dir=getwd(), Isgzipped=TRUE,
useFullNames=TRUE, modifyNames=TRUE,
method=c("RMA", "gcRMA"),...){
# # load packages
# suppressMessages(require(oligoClasses))
# suppressMessages(require(affy))
# suppressMessages(require(gcrma))
method <- match.arg(method)
# check the method input
if (!method%in%c("RMA", "gcRMA")){
stop("Sorry! Your input has something wrong. \n The method supported now are RMA and gcRMA.")
}
celFiles <- list.celfiles(data.dir, listGzipped = Isgzipped, full.names = useFullNames)
rawData <- affy::ReadAffy(filenames=celFiles)
if (method=="RMA"){
Res <- rma::rma(rawData)
}else if (method=="gcRMA"){
Res <- gcrma(rawData)
}
if(modifyNames==TRUE){
if(Isgzipped==TRUE){
sampleNames(rmaRes) <- sub("(^GSM[0-9]+).+CEL\\.gz$","\\1",sampleNames(rmaRes))
}else{
sampleNames(rmaRes) <- sub("(^GSM[0-9]+).+CEL$","\\1",sampleNames(rmaRes))
}
}
return(Res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.