R/pairwiseSERE.R

Defines functions pairwiseSERE

Documented in pairwiseSERE

#' Pairwise SERE statistics
#'
#' Pairwise SERE statistics
#'
#' @param counts \code{matrix} of raw counts
#' @param versionName versionName of the project
#' @return The \code{matrix} of SERE values
#' @author Marie-Agnes Dillies and Hugo Varet

# created october 19th, 2012
# modified december 2nd, 2013 (col.names=NA in write.table())
# modified Aug 5th, 2014 (removed tabDir argument)
# modified March 9th, 2015 (added clustering on SERE values)
# modified March 26th, 2015 (clustering with Ward criterion instead of complete)
# modified May 16th, 2017 (simplification of the loop on j)
# modified August 26th, 2019 (ggplot2)

pairwiseSERE <- function(counts, versionName="."){
  sigfun_Pearson <- function(observed) {
    #calculate lambda and expected values
    laneTotals <- colSums(observed)
    total <- sum(laneTotals)
    fullObserved <- observed[rowSums(observed)>0,]
    fullLambda <- rowSums(fullObserved)/total
    fullLhat <- fullLambda > 0
    fullExpected <- outer(fullLambda, laneTotals)

    #keep values
    fullKeep <- which(fullExpected > 0)

    #calculate degrees of freedom (nrow*(ncol -1) >> number of parameters - calculated (just lamda is calculated >> thats why minus 1)
    #calculate pearson and deviance for all values
    oeFull <- (fullObserved[fullKeep] - fullExpected[fullKeep])^2/ fullExpected[fullKeep] # pearson chisq test
    dfFull <- length(fullKeep) - sum(fullLhat!=0)

    sqrt(sum(oeFull)/dfFull)
  }

  sere <- matrix(0, ncol=ncol(counts), nrow=ncol(counts))
  for (i in 1:(ncol(counts)-1)){
    for (j in (i+1):ncol(counts)){
      sere[i,j] <- sere[j,i] <- sigfun_Pearson(counts[,c(i,j)])
    }
  }
  colnames(sere) <- rownames(sere) <- colnames(counts)
  sere <- round(sere, digits=3)

  pdf(file=paste0("figures/", versionName, "-clusterSERE.pdf"))
    hc <- hclust(as.dist(sere), method="ward.D")
    print(ggdendrogram(hc, theme_dendro=FALSE) +
            xlab("Samples") +
            ylab("SERE") +
            ggtitle(paste0(versionName, " - Cluster dendrogram on SERE values, Ward Criterion")) +
            theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5),
                  axis.text.y=element_text(angle=0)) +
            scale_y_continuous(expand=expansion(mult = c(0.01, 0.05))))
  dev.off()
  
  write.table(as.data.frame(sere), file=paste0("tables/", versionName,".SERE.xls"),
              sep="\t", row.names=TRUE, col.names=NA, dec=".", quote=FALSE)

  return(sere)
}
biomics-pasteur-fr/RNADiff documentation built on Aug. 27, 2020, 12:44 a.m.