R/anovaLike.R

Defines functions anovaLike

Documented in anovaLike

#' @title A function allowing the identification of differentially expressed genes.
#' @description This function executes in a docker edgeR for the idnetification of differentially expressed genes in single-cells RNAseq
#' @param group, a character string. Two options: sudo or docker, depending to which group the user belongs
#' @param file, a character string indicating the counts table file with the path of the file. 
#' @param cluster.file, a character string indicating the _clustering.output.txt file of interest, generated by bootstrapSimlar or bootStrapTsne. IMPORTANT this file miust be located in the same folder where counts.table is placed
#' @param ref.cluster, a number indicating the cluster to be used a reference for anova-like comparison with the other clusters.
#' @param sep, separator used in count file, e.g. '\\t', ','
#' @param logFC.threshold, minimal logFC present in at least one of the comparisons with respect to reference covariate
#' @param FDR.threshold, minimal FDR present in at least one of the comparisons with respect to reference covariate
#' @param logCPM.threshold, minimal average abundance
#' @param plot, boolean, TRUE a plot of differentially expressed genes is generated
#' @author Raffaele Calogero, raffaele.calogero [at] unito [dot] it, University of Torino, Italy
#' @return  Three tab delimited files file with prefix DE\_, filtered\_DE\_, logFC\_filtered\_DE\_ followed by the counts table name, the count table, reordered on the basis of cluster positions, has the extension \_reordered.txt
#'
#' @examples
#' \dontrun{
#'     #running deDetection
#' system("wget http://130.192.119.59/public/annotated_setPace_10000_noC5_clustering.output.txt")
#' system("wget http://130.192.119.59/public/annotated_setPace_10000_noC5.txt.zip")
#' unzip("annotated_setPace_10000_noC5.txt.zip")
#' anovaLike(group="docker", file=paste(getwd(),"annotated_setPace_10000_noC5.txt",sep="/"),
#'        sep="\t", cluster.file="annotated_setPace_10000_noC5_clustering.output.txt", ref.cluster=3,
#'        logFC.threshold=1, FDR.threshold=0.05, logCPM.threshold=4, plot=TRUE)
#'
#' }
#'
#' @export


anovaLike <- function(group=c("sudo","docker"), file,sep,cluster.file, ref.cluster, logFC.threshold=1, FDR.threshold, logCPM.threshold=4, plot=c(TRUE, FALSE)){

data.folder=dirname(file)
positions=length(strsplit(basename(file),"\\.")[[1]])
matrixNameC=strsplit(basename(file),"\\.")[[1]]
counts.table=paste(matrixNameC[seq(1,positions-1)],collapse="")
  matrixName=counts.table
file.type=strsplit(basename(basename(file)),"\\.")[[1]][positions]



counts.table=matrixName



               counts <- read.table(file, sep=sep, header=T, row.names=1, stringsAsFactors = F)

       names(counts) <- gsub("_","-",names(counts))
       clusters <- read.table(cluster.file, sep=sep, header=T, row.names=1, stringsAsFactors = F)
       rownames(clusters) <- gsub("_","-",rownames(clusters))

       if(!identical(names(counts), rownames(clusters))){
            clusters <- clusters[order(rownames(clusters)),]
            counts <- counts[,order(names(counts))]
       }
       names(counts) <- paste(names(counts), clusters$Belonging_Cluster, sep="_")
       ref <- counts[,grep(paste("_",ref.cluster,'$', sep=""), names(counts))]
      others <- counts[,setdiff(seq(1,dim(counts)[2]),grep(paste("_",ref.cluster,"$",sep=""), names(counts)))]        
      tmp.n <- as.numeric(sapply(strsplit(names(others), "_"), function(x)x[2]))
       others <- others[,order(tmp.n)]
       counts <- data.frame(ref, others, check.names = F)
       write.table(counts, sub(".txt","_reordered.txt", counts.table), sep=sep, col.names = NA)

       deDetection(group=group, data.folder=data.folder, counts.table=sub(".txt","_reordered.txt", counts.table),
                   file.type=file.type, logFC.threshold=logFC.threshold, FDR.threshold=FDR.threshold,
                   logCPM.threshold=logCPM.threshold, plot=plot)

       de.full <- read.table(paste("filtered_DE_", sub(".txt","_reordered.txt", counts.table), sep=""), sep="\t", header=T, row.names=1, stringsAsFactors = F)
       others.nu <- unique(as.numeric(sapply(strsplit(names(others), "_"), function(x)x[2])))
       others.nu <- paste(rep("C",length(others.nu)),others.nu, sep="")
       de <- de.full[,1:length(others.nu)]
       names(de) <- others.nu
       names(de.full) <- c(others.nu, c( "logCPM", "F", "PValue", "FDR"))
       write.table(de, paste("logFC_filtered_DE_", sub(".txt","_reordered.txt", counts.table),".",file.type,sep=""), sep=sep, col.names = NA)
       write.table(de.full, paste("filtered_DE_", sub(".txt","_reordered.txt", counts.table),".",file.type, sep=""), sep=sep, col.names = NA)

}
kendomaniac/CASC documentation built on Oct. 4, 2023, 11:10 a.m.