R/itemselect.R

Defines functions print.itemselect itemselect

Documented in itemselect print.itemselect

### select significantly responsive items 
itemselect <- function(omicdata, select.method = c("quadratic", "linear", "ANOVA"), FDR = 0.05, 
                       max.ties.prop = 0.2)
{
  # Checks
  if (!(inherits(omicdata, "microarraydata") | 
        inherits(omicdata, "RNAseqdata") |
        inherits(omicdata, "continuousomicdata") |
        inherits(omicdata, "continuousanchoringdata")))
    stop("Use only with 'microarraydata', 'RNAseqdata', 'continuousomicdata' or 
    'continuousanchoringdata' objects, respectively
    created with functions 'microarraydata()', 'RNAseqdata()', 'metabolomicdata()'
    or 'continuousomicdata()'
    and 'continuousanchoringdata()'.")
  select.method <- match.arg(select.method, c("quadratic", "linear", "ANOVA"))
  if (select.method == "ANOVA")
  {
    # Check that there are replicates for at least half of the doses
    nbdoses <- length(omicdata$design)
    nbdoseswithoutrep <- sum(omicdata$design == 1)
    if (nbdoseswithoutrep > nbdoses / 2) 
      stop("The ANOVA method should not be used when there are more than half of the doses 
           without replicates. Choose the quadratic or linear trend test for such a design.")
  }
  if (!is.numeric(FDR))
    stop("FDR, the false discovery rate, must a number in ]0; 1[ (generally under 0.1).")
  if ((FDR <=0) | (FDR >=1))
    stop("FDR, the false discovery rate, must in ]0; 1[.")
  if ((max.ties.prop <=0) | (max.ties.prop >0.5))
    stop("max.ties.prop, the maximal tolerated proportion of tied values per item, must in ]0; 0.5].")
  
  item <- omicdata$item
  dose <- omicdata$dose
  fdose <- as.factor(dose)
  doseranks <- as.numeric(as.factor(dose))
  doseranks2 <- doseranks*doseranks
  irow <- seq_len(length(item))
  
  if (inherits(omicdata,"microarraydata") | 
      inherits(omicdata,"continuousomicdata"))
  {
    data <- omicdata$data
    if (select.method == "quadratic")
    {
      design4lmFit <- stats::model.matrix(~ doseranks + doseranks2)
    } else
      if (select.method == "linear")
      {
        design4lmFit <- stats::model.matrix(~ doseranks)
      } else
        if (select.method == "ANOVA")
        {
          design4lmFit <- stats::model.matrix(~ fdose)
        } 
    
    # Selection using limma    
    fit <- lmFit(data, design4lmFit)
    fitBayes <- eBayes(fit)
    # all adjusted pvalues without sorting
    res <- topTable(fitBayes, adjust.method = "BH", number = nrow(data), sort.by = "none") 
    
    selectindexnonsorted <- irow[res$adj.P.Val < FDR]
    adjpvaluenonsorted <- res$adj.P.Val[selectindexnonsorted]
    irowsortedbypvalue <- order(adjpvaluenonsorted, decreasing = FALSE)
    selectindex <- selectindexnonsorted[irowsortedbypvalue]
    adjpvalue <- adjpvaluenonsorted[irowsortedbypvalue]
  } else
    
  if (inherits(omicdata,"continuousanchoringdata"))
  {
    data <- omicdata$data
    nitem <- nrow(data)
    pvalue <- numeric(length = nitem)
    # Selection using lm    
    if (select.method == "quadratic")
    {
      for (i in 1:nitem) # to write in a sapply in case there are a lot of endpoints
      {
        lmFit <- stats::lm(data[i, ] ~ doseranks + doseranks2)
        lmFitconst <- stats::lm(data[i, ] ~ 1)
        a <- stats::anova(lmFit, lmFitconst)
        pvalue[i] <- a[["Pr(>F)"]][2]
      }
    } else
    if (select.method == "linear")
    {
      for (i in 1:nitem) # to write in a sapply in case there are a lot of endpoints
      {
        lmFit <- stats::lm(data[i, ] ~ doseranks)
        lmFitconst <- stats::lm(data[i, ] ~ 1)
        a <- stats::anova(lmFit, lmFitconst)
        pvalue[i] <- a[["Pr(>F)"]][2]
      }
    } else
    if (select.method == "ANOVA")
    {
      for (i in 1:nitem) # to write in a sapply in case there are a lot of endpoints
      {
        lmFit <- stats::lm(data[i, ] ~ fdose)
        lmFitconst <- stats::lm(data[i, ] ~ 1)
        a <- stats::anova(lmFit, lmFitconst)
        pvalue[i] <- a[["Pr(>F)"]][2]
      }
    } 
      
    # all adjusted pvalues without sorting after Benjamini Hochberg procedure
    wholeadjpvalue <- stats::p.adjust(pvalue, method = "BH") 

    selectindexnonsorted <- irow[wholeadjpvalue < FDR]
    adjpvaluenonsorted <- wholeadjpvalue[selectindexnonsorted]
    irowsortedbypvalue <- order(adjpvaluenonsorted, decreasing = FALSE)
    selectindex <- selectindexnonsorted[irowsortedbypvalue]
    adjpvalue <- adjpvaluenonsorted[irowsortedbypvalue]
    
  } else
      
    
  if (inherits(omicdata,"RNAseqdata"))
  {
    data <- omicdata$raw.counts
    coldata <- data.frame(fdose = fdose, doseranks = doseranks, doseranks2 = doseranks2)
    rownames(coldata) <- colnames(data)
    
    if (select.method == "quadratic")
    {
      dds <- DESeqDataSetFromMatrix(
        countData = data,
        colData = coldata,
        design = ~ doseranks + doseranks2
      )
    } else
      if (select.method == "linear")
      {
        dds <- DESeqDataSetFromMatrix(
          countData = data,
          colData = coldata,
          design = ~ doseranks
        )
      } else
        if (select.method == "ANOVA")
        {
          dds <- DESeqDataSetFromMatrix(
            countData = data,
            colData = coldata,
            design = ~ fdose
          )
        } 
    
    # Selection using DESeq2    
    dds <- DESeq(dds, test = "LRT", reduced = ~ 1)
    res <- results(dds)
    # all adjusted pvalues without sorting in res$padj

    selectindexnonsorted <- irow[(res$padj < FDR) & !is.na(res$padj)]
    adjpvaluenonsorted <- res$padj[selectindexnonsorted]
    irowsortedbypvalue <- order(adjpvaluenonsorted, 
                                decreasing = FALSE, na.last = TRUE)
    selectindex <- selectindexnonsorted[irowsortedbypvalue]
    adjpvalue <- adjpvaluenonsorted[irowsortedbypvalue]
  }
  
  # elimination of first selected items with a proportion of tied values
  # above max.tied.values.prop (assuming tied values correspond to the min 
  # value, at which non detection are imputed
  nsample <- length(dose)
  max4nties <- nsample * max.ties.prop
  if (length(selectindex) != 0) 
  {
    check.ties <- function(index)
    {
      datai <- data[index, ]
      mini <- min(datai)
      nbtiesi <- length(which(datai == mini))
      return(nbtiesi < max4nties)
    }
    tokeep <- sapply(selectindex, check.ties)
    selectindex <- selectindex[tokeep]
    adjpvalue <- adjpvalue[tokeep]
  } else
  {
    warning(strwrap(prefix = "\n", initial = "\n", 
                    "NO ITEM WAS SELECTED."))
  }
  
  reslist <- list(adjpvalue = adjpvalue, selectindex = selectindex, 
                  omicdata = omicdata, select.method = select.method, FDR = FDR)  
  
  return(structure(reslist, class = "itemselect"))
}

print.itemselect <- function(x, nfirstitems = 20, ...)
{
  if (!inherits(x, "itemselect"))
    stop("Use only with 'itemselect' objects.")
  
  if (x$select.method == "ANOVA")
  {
    cat("Number of selected items using an ANOVA type test with an FDR of ", x$FDR, ": ", length(x$selectindex), "\n", sep = "")
  } else
    if (x$select.method == "linear")
    {
      cat("Number of selected items using a linear trend test with an FDR of ", x$FDR, ": ", length(x$selectindex), "\n", sep = "")
    } else
      if (x$select.method == "quadratic")
      {
        cat("Number of selected items using a quadratic trend test with an FDR of ", x$FDR, ": ", length(x$selectindex), "\n", sep = "")
      } 
  
  if (length(x$selectindex) > nfirstitems) 
  {
    cat("Identifiers of the first ", nfirstitems, " most responsive items:\n", sep = "")
    print(x$omicdata$item[x$selectindex[1:nfirstitems]])
  } else
  {
    cat("Identifiers of the responsive items:\n")
    print(x$omicdata$item[x$selectindex])
  }
}
aursiber/DRomics documentation built on Feb. 6, 2024, 4:28 p.m.