R/filterData.R

Defines functions filt.by.Signal filterBySignal filterByVar createFilteringReportFile filteringReport filterData

Documented in filterData

################################################################
sdf <- function (x, threshold) if (sd(x) < threshold) return(FALSE) else return(TRUE)
#################################################################
filt.by.Signal <- function(x, grups, threshold) {
  if(max(sapply(split(x, grups), mean), na.rm = TRUE) < threshold) return(FALSE) else return(TRUE)
}

#################################################################
filterBySignal <- function(expres, groups, threshold, sigFun.Name = "filt.by.signal", thr.as.perc = TRUE) {
  if(thr.as.perc) {
    signalThr <- quantile(as.vector(expres), threshold / 100)
  } else {
    signalThr <- threshold
  }

  sigFun <- eval(parse(text = sigFun.Name))
  filtered.vals <- apply (expres, 1, sigFun, groups, signalThr)
  expres.filtered <- expres[filtered.vals, ]

  return(expres.filtered)
}
##################################################################
filterByVar <- function(expres, threshold, varFun.Name = "sdf", thr.as.perc = TRUE, thr.fun = sd) {
  if(thr.as.perc) {
    SD <- apply(expres, 1, thr.fun)
    variabilityThr <- quantile (SD,  threshold / 100) # SD3Q
  } else {
    variabilityThr <- threshold
  }

  varFun <- eval(parse(text = varFun.Name))
  filtered.vals <- apply (expres, 1, varFun, variabilityThr)
  expres.filtered <- expres[filtered.vals, ]

  return(expres.filtered)
}
##################################################################
createFilteringReportFile <- function(filteringReportFName, outputDir) {
  write(paste(rep("-", 70), collapse = ""), file = file.path(outputDir, filteringReportFName), append = FALSE)

  linia <- paste("Type Of Filtering", "Threshold", "Genes Number", "Number Of Samples", sep = "\t")
  write(linia, file = file.path(outputDir, filteringReportFName), append = TRUE)

  write(paste(rep("-", 70), collapse = ""), file = file.path(outputDir, filteringReportFName), append = TRUE)
}
##################################################################
filteringReport <- function(n.exprs = c(NA, NA),
                            n.exprs.filtered.Affy = c(NA, NA),
                            n.exprs.filtered.BySignal,
                            signalThr,
                            n.exprs.filtered.ByVar = c(NA, NA),
                            varThr,
                            outputDir,
                            filteringReportFName = "FilteringReport.txt") {
  if(!file.exists(file.path(outputDir, filteringReportFName))) createFilteringReportFile(filteringReportFName, outputDir)

  linia <- paste("Non-filtering", "---", n.exprs[1], n.exprs[2], sep = "\t")
  write(linia, file = file.path(outputDir, filteringReportFName), append = TRUE)

  linia <- paste("By Affymetrix Controls", "---", n.exprs.filtered.Affy[1], n.exprs.filtered.Affy[2], sep = "\t")
  write(linia, file = file.path(outputDir, filteringReportFName), append = TRUE)

  linia <- paste("By Signal", signalThr, n.exprs.filtered.BySignal[1], n.exprs.filtered.BySignal[2], sep = "\t")
  write(linia, file = file.path(outputDir, filteringReportFName), append = TRUE)

  linia <- paste("By Variability", varThr, n.exprs.filtered.ByVar[1], n.exprs.filtered.ByVar[2], sep = "\t")
  write(linia, file = file.path(outputDir, filteringReportFName), append = TRUE)

  write(paste(rep("-", 35), collapse = " "), file = file.path(outputDir, filteringReportFName), append = TRUE)

}
#################################################################

#' filterData
#'
#' Function that filter data and create a report file.
#'
#' @param expres Data set to be filtered.
#' @param controls Vector that indicates wich observations are controls.
#' @param removeNAs If TRUE the Na's observations will be deleted.
#' @param entrezs Name of the file for Entrez genes.
#' @param bySignal If TRUE the data is filtered by signal value.
#' @param signalThr Threshold for filter the data.
#' @param grups Name of the different groups of the data.
#' @param sigFun.Name ????????
#' @param sigThr.as.perc ???????????
#' @param byVar If TRUE the data is filtered by ?????
#' @param variabilityThr Threshold for filter the data.
#' @param varFun.Name ????????
#' @param varThr.as.perc Threshold variability as percentage
#' @param pairingFun.Name Name of the function to be imported to pair data
#' @param targets Name of the targets file.
#' @param doReport If TRUE a report is created.
#' @param outputDir Path of the file created.
#' @param filteringReportFName Name of the report file.
#' @return Matrix object with te data filtered
#' @examples
#' \dontrun{
#' load("./ResultsDir/normalizedData.Rda")
#' repes <- duplicated(exprs(my.norm), MARGIN = 1)
#' exprs(my.norm) <- exprs(my.norm)[!repes, ]
#' eset_norm <- my.norm
#' load("./ResultsDir/controls.Rda")
#' removeNAs <- TRUE
#' load("./ResultsDir/Entrezs.Rda")
#' entrezs <- entrezTable
#' SignalFilter <- TRUE
#' signalThreshold <- 50
#' signalFilter.Function <- "filt.by.Signal"
#' signalThreshold.as.percentage <- TRUE
#' VarFilter <- TRUE
#' variabilityThreshold <- 50
#' variability.Function <- "sdf"
#' variabilityThreshold.as.percentage <- TRUE
#' pairing.Function <- NULL
#' my.targets <-read.AnnotatedDataFrame("./celfiles/targets.txt", header = TRUE, row.names = 1)
#' doReport <- TRUE
#' outputDir <- "./ResultsDir"
#' FilteringReportFileName <- "FilteringReport.txt"
#'
#' exprs.filtered <- filterData(expres = exprs(eset_norm) ,controls = names(controlsTable),
#'                              removeNAs = TRUE, entrezs = entrezs, bySignal = SignalFilter,
#'                              signalThr = signalThreshold, grups = pData(eset_norm)$grupo,
#'                              sigFun.Name = signalFilter.Function,
#'                              sigThr.as.perc = signalThreshold.as.percentage, byVar = VarFilter,
#'                              variabilityThr = variabilityThreshold,
#'                              varFun.Name = variability.Function,
#'                              varThr.as.perc = variabilityThreshold.as.percentage,
#'                              pairingFun.Name = pairing.Function,
#'                              targets = my.targets, doReport = doReport, outputDir = outputDir,
#'                              filteringReportFName = FilteringReportFileName)
#'}
#' @export

filterData <- function(expres,
                       controls = NULL,
                       removeNAs = FALSE,
                       entrezs = NULL,
                       bySignal = TRUE,
                       signalThr,
                       grups = NULL,
                       sigFun.Name = "filt.by.signal",
                       sigThr.as.perc = TRUE,
                       byVar = TRUE,
                       variabilityThr,
                       varFun.Name = "sdf",
                       varThr.as.perc = TRUE,
                       pairingFun.Name = NULL,
                       targets,
                       doReport = NULL,                 # Per  defecte executara la crida a la funcio que fa el report
                       outputDir = NULL,                # Directori a on desa el report del filtrat
                       filteringReportFName = NULL) {   # Nom per defecte del report
  ### Matrix object is needed
  if(is.data.frame(expres)) expres <- as.matrix(expres)

  ### Remove controls and NAs
  if(!is.null(controls)) {
    expresNotControls <- setdiff(rownames(expres), controls)

    if(removeNAs && (!is.null(entrezs))) {
      entrezsNotNAs <- names(entrezs[!is.na(entrezs)])                    # List that contains notNAs of entrezs
      notControlsNorNAs <- intersect(expresNotControls, entrezsNotNAs)    #IDs of the expresion probes
      exprs.filtered.0 <- expres[notControlsNorNAs, ]                     # Filter data
    } else {                                                              # If there is no Entrez or we don't want to remove NAs
      exprs.filtered.0 <- expres[notControls, ]                           # Expresion without controls
    }
  } else {                                                                # If there is no controls
    if(removeNAs &&(!is.null(entrezs))) {
      entrezsNotNAs <- names(entrezs[!is.na(entrezs)])
      exprs.filtered.0 <- expres[entrezsNotNAs, ]
    } else {
      exprs.filtered.0 <- expres                                          # No filtered by NA or controls
    }
  }

  ### Filtratge per senyal
  if(bySignal && (!is.null(grups))) {
    exprs.filtered.1 <- filterBySignal(exprs.filtered.0, grups, threshold = signalThr,
                                       sigFun.Name = sigFun.Name, thr.as.perc = sigThr.as.perc)
  } else {
    exprs.filtered.1 <- exprs.filtered.0
  }

  ### Aparellament. MOLTES vegades aquest pas no es fa. ?????????????
  ### EL control de si es fa o no el basem en la funcio d'aparellament que es passa com parametre
  ### Si es deixa a NULL el pas s'omet
  if(!is.null(pairingFun.Name)){
    exprs.filtered.2 <- do.call(pairingFun.Name, list(exprs.filtered.1, targets))
  } else {
    exprs.filtered.2 <- exprs.filtered.1
  }

  ### Filtratge per variabilitat
  if(byVar) {
    exprs.filtered.3 <- filterByVar(exprs.filtered.2, threshold = variabilityThr,
                                    varFun.Name = varFun.Name, thr.as.perc = varThr.as.perc)
  } else {
    exprs.filtered.3 <- exprs.filtered.2
  }

  ### Report del proces de filtratge
  if(!is.null(doReport)) {
    if(doReport) {
      if((!is.null(filteringReportFName)) && (!is.null(outputDir))) {
        filteringReport(n.exprs = dim(expres),
                        n.exprs.filtered.Affy = dim(exprs.filtered.0),
                        n.exprs.filtered.BySignal = dim(exprs.filtered.1), signalThr = signalThr,
                        n.exprs.filtered.ByVar = dim(exprs.filtered.3), varThr = variabilityThr,
                        outputDir = outputDir,
                        filteringReportFName = filteringReportFName)
      }
    }
  }

  return(exprs.filtered.3)
}
uebvhir/BasicP4microArrays documentation built on Nov. 5, 2019, 11:03 a.m.