R/noiseq.R

Defines functions addData readData PCA.GENES .calcFactorWeighted .calcFactorQuantile .calcNormFactors make.ASCA.design filtered.data CV mypretty DE.plot dat countsbio.plot biodetection.plot biodetection.dat logscaling plot.y2 ranking sim.samples int.mult ASCAfun.triple ASCAfun2 ASCAfun1 ASCA.3f ASCA.2f ASCA.1f ARSyNseq ARSyNmodel ARSyNcomponents allMDbio

################################################################################

# classes.R

setClass("Biodetection",representation(dat="list"))
setClass("CD",representation(dat="list"))
setClass("CountsBio",representation(dat="list"))
setClass("GCbias",representation(dat="list"))
setClass("lengthbias",representation(dat="list"))
setClass("Saturation",representation(dat="list"))
setClass("PCA",representation(dat="list"))

setGeneric("explo.plot",function(object,...) standardGeneric("explo.plot"))

setMethod("explo.plot","Biodetection",
    function(object,samples=c(1,2),plottype=c("persample","comparison"),
        toplot="protein_coding",...)
        biodetection.plot(object@dat,samples=samples,plottype=plottype,
            toplot=toplot,...))

setMethod("explo.plot","CD",function(object,samples=NULL,...) 
    cd.plot(object@dat,samples=samples,...))

setMethod("explo.plot","CountsBio",
    function(object,samples=c(1,2),toplot="global",
        plottype=c("barplot","boxplot"),...)
        countsbio.plot(object@dat,samples,toplot,plottype,...))

#setMethod("explo.plot","GCbias",
#    function(object,samples=NULL,toplot="global",...)
#    GC.plot(object@dat,samples=samples,toplot=toplot,...))

#setMethod("explo.plot","lengthbias",
#    function(object,samples=NULL,toplot="global",...)
#    length.plot(object@dat,samples=samples,toplot=toplot,...))

#setMethod("explo.plot","Saturation",
#    function(object,samples=NULL,toplot=1,yleftlim=NULL,yrightlim=NULL,...)
#    saturation.plot(object@dat,samples=samples,toplot=toplot,yleftlim=yleftlim,
#        yrightlim=yrightlim,...))

#setMethod("explo.plot","PCA",
#    function(object,samples=c(1,2),plottype="scores",factor=NULL)
#    PCA.plot(object@dat,samples=samples,plottype=plottype,factor=factor))


# Show methods for exploration objects
setMethod("show","Biodetection",function(object) {
    cat("\n Reference genome: \n==========\n")
    names(dimnames(object@dat$genome))=NULL
    print(object@dat$genome)
    for (i in seq_len(length(object@dat$biotables))) {
        cat("\n",names(object@dat$biotables)[i],"\n==========\n")
        print(object@dat$biotables[[i]])
    }
})

setMethod("show","CD",function(object) {
    cat("\n Confidence intervals for median of M to compare each sample ",
        "to reference:\n=======\n")
    print(object@dat$DiagnosticTest)
    cat("\n Reference sample is:\n=======\n")
    print(object@dat$refColumn)
})

setMethod("show","CountsBio",function(object) {
    cat("\n Summary: \n============\n")              
    print(object@dat$summary[[1]])            
})

setMethod("show","GCbias",function(object) {
    x <- object@dat$RegressionModels
    for (i in seq_len(length(x))) {
        print(names(x)[i])
        print(summary(x[[i]]))
    }
})

setMethod("show","lengthbias",function(object) {
    x <- object@dat$RegressionModels
    for (i in seq_len(length(x))) {
        print(names(x)[i])
        print(summary(x[[i]]))
    }
})

setMethod("show","Saturation",function(object) {
    x <- dat2save(object)
    cat("\n Number of detected features at each sequencing ",
        "depth: \n============\n")
    for (i in seq_len(length(x))) {
        print(names(x)[i])
        print(x[[i]])
    }            
})

setMethod("show","PCA",function(object) {
    x <- object$result$var.exp
    x <- round(x*100,4)
    colnames(x) <- c("%Var","Cum %Var")
    rownames(x) <- paste("PC",seq_len(nrow(x)))
    cat("\n Percentage of total variance explained by each component: ",
        "\n============\n")
    print(x)
})


# Coercion methods for exploration objects
setGeneric("dat2save",function(object) standardGeneric("dat2save"))
setMethod("dat2save","Biodetection",function(object) object@dat)
setMethod("dat2save","CD",function(object) object@dat)
setMethod("dat2save","CountsBio",function(object) object@dat$summary)
setMethod("dat2save","GCbias",function(object) object@dat$data2plot)
setMethod("dat2save","lengthbias",function(object) object@dat$data2plot)
setMethod("dat2save","Saturation",function(object) {
    muestras=vector("list",length=length(object@dat$depth))
    names(muestras)=names(object@dat$depth)
    for (i in seq_len(length(muestras))) {
        muestras[[i]] <- object@dat$depth[[i]]
        for (j in seq_len(length(object@dat$saturation))) {
            muestras[[i]] <- cbind(muestras[[i]],
                object@dat$saturation[[j]][[i]])
        }
        colnames(muestras[[i]]) <- c("depth",names(object@dat$saturation))
    }

    muestras
})

setMethod("dat2save","PCA",function(object) object@dat$result)

setClass("myInfo",representation(method="character",k="numeric",
    lc="numeric",factor="vector",v="numeric",nss="numeric",pnr="numeric",
    comparison="vector",replicates="character"))
setClass("Output",representation(results="list"),contains="myInfo")

setValidity("Output",function(object) {
    if (!(is.character(object@method))) {
        return(paste("Method must be a string"))
    } else if (!(is.numeric(object@k))) {
        return(paste("k must be numeric"))
    } else if (!(is.numeric(object@lc))) {
        return(paste("lc must be numeric"))
    } else if (!(is.vector(object@factor))) {
        return(paste("Factor must be a vector of strings"))
    } else if (!(is.numeric(object@v))) {
        return(paste("v must be numeric"))
    } else if (!(is.numeric(object@nss))) {
        return(paste("nss must be numeric"))
    } else if (!(is.numeric(object@pnr))) {
        return(paste("pnr must be numeric"))
    } else if (!(is.vector(object@comparison))) {
        return(paste("Comparison must be a vector of strings"))
    } else if (!(is.list(object@results))) {
        return(paste("Results must be a list of data.frames"))
    } else {
        return(TRUE)
    }
})

Output <- function (data,method,k,lc,factor,v,nss,pnr,comparison,replicates) {
    new("Output",results=data,method=method,k=k,lc=lc,factor=factor,v=v,nss=nss,
        pnr=pnr,comparison=comparison,replicates=replicates)
}

setMethod("show","Output",function(object) {
    if (object@method == "n")
        object@method="none"
    for (i in seq_len(length(object@results))) {
        cat("\nSummary",i,"\n=========\n")
        cat("\nYou are comparing",object@comparison[i],"from",object@factor[i],
            "\n\n")
        print(head(object@results[[i]][order(object@results[[i]][,5],
            decreasing=TRUE),]))
    }
    cat("\nNormalization\n")
    cat("\tmethod:",object@method,"\n")
    cat("\tk:",object@k,"\n")
    cat("\tlc:",object@lc,"\n")

    # Simulated samples
    if (object@replicates == "no") {
        cat("\nYou are working with simulated replicates:\n")
        cat("\tpnr:",object@pnr,"\n")
        cat("\tnss:",object@nss,"\n")
        cat("\tv:",object@v,"\n")
    }
    # With biological or technical replicates
    else {
        cat("\nYou are working with",object@replicates,"replicates\n")
    }           
})

################################################################################

################################################################################

# allMD.R

allMD <- function (input,factor,conditions,k=0.5,replicates,norm="rpkm",pnr=0.2,
    nss=5,v=0.02,lc=0) {
    # Check if the factor introduced is already defined
    # If the factor introduced is defined and has more than 2 conditions,it will
    # check if the conditions specified are defined too
    condition_fac <- FALSE
    condition_lev <- FALSE

    datos1 <- datos2 <- matrix()

    for (i in colnames(pData(input))) {
        if (factor == i) {
            condition_fac <- TRUE
            if (!is.factor(pData(input)[,i])) 
                pData(input)[,i]=as.factor(pData(input)[,i])
            if (length(levels(pData(input)[,i])) == 2) {
                if (!is.null(assayData(input)$exprs)) {
                    datos1 <- 
                        assayData(input)$exprs[,which(
                            pData(input)[,i]==levels(pData(input)[,i])[1]),
                            drop=FALSE]
                    datos2 <- 
                        assayData(input)$exprs[,which(
                            pData(input)[,i] ==levels(pData(input)[,i])[2]),
                            drop=FALSE]
                } 
                else {
                    datos1 <- 
                        assayData(input)$counts[,which(
                            pData(input)[,i]==levels(pData(input)[,i])[1]),
                            drop=FALSE]
                    datos2 <- assayData(input)$counts[,which(
                        pData(input)[,i]==levels(pData(input)[,i])[2]),
                            drop=FALSE]
                }

                # Define the comparison string
                comparison <- paste(levels(pData(input)[,i])[1],
                    levels(pData(input)[,i])[2],sep=" - ")

                condition_lev <- TRUE
            }

            else {
                if (is.null(conditions))
                    stop("Error. You must specify which conditions you wish ",
                        "to compare when the factor has two or more ",
                        "conditions.\n")
                if (length(conditions) != 2)
                    stop("Error. The argument conditions must contain the 2 ",
                        "conditions you wish to compare.")
            
                l <- conditions %in% pData(input)[,i]
                # If they are defined,they will be TRUE
                if (l[1] == TRUE && l[2] == TRUE) {
                    if (!is.null(assayData(input)$exprs)) {
                        datos1 <- assayData(input)$exprs[,which(
                            pData(input)[,i] == conditions[1]),drop=FALSE]
                        datos2 <- assayData(input)$exprs[,which(
                            pData(input)[,i] == conditions[2]),drop=FALSE]
                    }
                    else {
                        datos1 <- assayData(input)$counts[,which(
                            pData(input)[,i] == conditions[1]),drop=FALSE]
                        datos2 <- assayData(input)$counts[,which(
                            pData(input)[,i] == conditions[2]),drop=FALSE]
                    }
                    # Define the comparison string
                    comparison <- paste(conditions[1],conditions[2],sep=" - ")
                    condition_lev <- TRUE
                }
            }
        }
    }

    if (!condition_fac)
        stop("The factor you have written does not correspond with any of the ",
            "ones you have defined.")
            
    if (!condition_lev)
        stop("The conditions you have written don't exist in the factor ",
            "specified.\n")

    # Correction to make it work when there are simulated samples
    if (replicates == "no")
        replicates <- "technical"

    n1 <- ncol(as.matrix(datos1))
    n2 <- ncol(as.matrix(datos2))

    if (norm == "n") { # no normalization
        datos1 <- round(datos1,100)
        datos2 <- round(datos2,100)
    }

    if (is.null(k)) {
        m1 <- min(datos1[noceros(datos1,num=FALSE)],na.rm=TRUE)
        m2 <- min(datos2[noceros(datos2,num=FALSE)],na.rm=TRUE)
        mm <- min(m1,m2)
        k <- mm/2    
    } 

    # Total counts for each gene:
    suma1 <- rowSums(as.matrix(datos1))
    suma2 <- rowSums(as.matrix(datos2))

    # All genes
    todos <- rownames(as.matrix(datos1))

    # Genes with counts in any condition
    concounts <- names(which(suma1+suma2 > 0))

    long <- 1000
    g.sinL <- NULL

    if (!is.null(featureData(input)@data$Length)) {
        g.sinL <- names(which(is.na(featureData(input)@data$Length)))
        if (any(!is.na(featureData(input)@data$Length)) == TRUE) 
        long <- featureData(input)@data[concounts,"Length"]
    }

    if (replicates == "technical") { ### technical replicates
        suma1 <- suma1[concounts]
        suma2 <- suma2[concounts]    

        #----------------------------------------------------------------------#
        # Normalization of counts for each condition (aggregating replicates)

        if (norm == "rpkm") {      # RPKM
            suma1.norm <- noirpkm(suma1,long=long,k=k,lc=lc)
            suma2.norm <- noirpkm(suma2,long=long,k=k,lc=lc)
        }

        if (norm == "uqua") {
            suma.norm <- noiuqua(cbind(suma1,suma2),long=long,lc=lc,k=k)
            suma1.norm <- as.matrix(suma.norm[ ,1])
            suma2.norm <- as.matrix(suma.norm[ ,2])
        }

        if (norm == "tmm") {
            suma.norm <- noitmm(as.matrix(cbind(suma1,suma2)),long=long,lc=lc,
                k=k)
            suma1.norm <- as.matrix(suma.norm[ ,1])
            suma2.norm <- as.matrix(suma.norm[ ,2])
        }
    }

    #-------------------------------------------------------------------------#

    ## Noise distribution

    if ((n1+n2)>2) {   # with real samples
        datitos <- cbind(datos1,datos2)
        datitos <- datitos[concounts,]

        gens.sin0 <- setdiff(concounts,g.sinL)

        if (norm == "n") {       # no normalization
            datitos.0 <- sinceros(datitos,k=k)
            datitos.norm <- datitos.0[gens.sin0,]
        }

        if (norm == "rpkm") {      # RPKM
            datitos.0 <- noirpkm(datitos,long=long,k=k,lc=lc)
            datitos.norm <- datitos.0[gens.sin0,]
        }

        if (norm == "uqua") {      # Upper Quartile
            datitos.0 <- noiuqua(datitos,long=long,lc=lc,k=k)
            datitos.norm <- datitos.0[gens.sin0,]
        }

        if (norm == "tmm") {
            datitos.0 <- noitmm(datitos,long=long,lc=lc,k=k)
            datitos.norm <- datitos.0[gens.sin0,]
        }

        datos1.norm <- datitos.norm[ ,seq_len(n1)]
        datos2.norm <- datitos.norm[ ,(n1+1):(n1+n2)]

        if (n1 > 1) {
            MD1 <- MD(dat=datos1.norm)
        } 
        else { 
            MD1 <- NULL 
        }

        if (n2 > 1) {
            MD2 <- MD(dat=datos2.norm)
        } 
        else { 
            MD2 <- NULL 
        }
    } 
    else { # with simulated samples
        if (nss == 0) {
            nss <- 5
        }
        
        datos.sim <- sim.samples(counts1=sinceros(suma1,k=k),
            counts2=sinceros(suma2,k=k),pnr=pnr,nss=nss,v=v)

        nn <- vapply(datos.sim,ncol,numeric(1))
        dat.sim.norm <- vector("list",length=2)
        datitos <- cbind(datos.sim[[1]],datos.sim[[2]])
        rownames(datitos)=names(suma1)

        sumita <- rowSums(datitos)
        g.sin0 <- names(which(sumita > 0))
        gens.sin0 <- setdiff(g.sin0,g.sinL)

        if (norm == "n") {       # no normalization
            datitos.0 <- sinceros(datitos,k=k)
            datitos.norm <- datitos.0[gens.sin0,]
        }

        if (norm == "rpkm") {      # RPKM
            datitos.0 <- noirpkm(datitos,long=long,k=k,lc=lc)
            datitos.norm <- datitos.0[gens.sin0,]
        }

        if (norm == "uqua") {      # Upper Quartile
            datitos.0 <- noiuqua(datitos,long=long,lc=lc,k=k)
            datitos.norm <- datitos.0[gens.sin0,]
        }

        if (norm == "tmm") {
            datitos.0 <- noitmm(datitos,long=long,lc=lc,k=k)
            datitos.norm <- datitos.0[gens.sin0,]
        }

        dat.sim.norm[[1]] <- datitos.norm[,seq_len(nn[1])]
        dat.sim.norm[[2]] <- datitos.norm[,(nn[1]+1):sum(nn)]

        MD1 <- MD(dat=dat.sim.norm[[1]])
        MD2 <- MD(dat=dat.sim.norm[[2]])
    }

    Mr <- c(as.numeric(MD1$M),as.numeric(MD2$M))
    Dr <- c(as.numeric(MD1$D),as.numeric(MD2$D))

    #-------------------------------------------------------------------------#

    ## M and D for different experimental conditions

    if (replicates == "technical" & norm != "n") {
        MDs <- MD(dat=cbind(suma1.norm,suma2.norm))

        lev1 <- suma1.norm[,1]
        lev1 <- lev1[todos]
        
        lev2 <- suma2.norm[,1]
        lev2 <- lev2[todos]
    } 
    else {
        if ((n1+n1) == 2) {
            datos1.norm <- sinceros(as.matrix(datos1)[concounts,],k=k)
            datos2.norm <- sinceros(as.matrix(datos2)[concounts,],k=k)      
        }  

        resum1.norm <- rowMeans(as.matrix(datos1.norm))
        resum2.norm <- rowMeans(as.matrix(datos2.norm))

        lev1 <- resum1.norm[todos]
        lev2 <- resum2.norm[todos]

        MDs <- MD(dat=cbind(resum1.norm,resum2.norm))
    }

    ## Completing M and D
    names(lev1) <- names(lev2) <- todos

    Ms <- as.numeric(MDs$M)
    names(Ms) <- rownames(MDs$M)
    Ms <- Ms[todos]
    names(Ms) <- todos

    Ds <- as.numeric(MDs$D)
    names(Ds) <- rownames(MDs$D)
    Ds <- Ds[todos]
    names(Ds) <- todos

    ## Results
    list("k"=k,"comp"=comparison,"Level1"=lev1,"Level2"=lev2,"Ms"=Ms,"Ds"=Ds,
        "Mn"=Mr,"Dn"=Dr)
}

allMDbio <- function(input,factor,conditions,k=0.5,norm="rpkm",lc=1,r=10,
    a0per=0.9,nclust=15,filter=1,depth=NULL,cv.cutoff=0,cpm=1) {

    # Check if the factor introduced is already defined
    # If the factor introduced is defined and has more than 2 conditions,
    # it will check if the conditions specified are defined too
    condition_fac <- FALSE
    condition_lev <- FALSE

    datos1 <- datos2 <- matrix()

    for (i in colnames(pData(input))) {
        if (factor == i) {
            condition_fac <- TRUE

            if (!is.factor(pData(input)[,i])) 
                pData(input)[,i] <- as.factor(pData(input)[,i])

            if (length(levels(pData(input)[,i])) == 2) {
                if (!is.null(assayData(input)$exprs)) {
                    datos1 <- assayData(input)$exprs[,which(
                        pData(input)[,i]==levels(pData(input)[,i])[1])]
                    datos2 <- assayData(input)$exprs[,which(
                        pData(input)[,i] ==levels(pData(input)[,i])[2])]
                } 
                else {
                    datos1 <- assayData(input)$counts[,which(
                        pData(input)[,i] ==levels(pData(input)[,i])[1])]
                    datos2 <- assayData(input)$counts[,which(
                        pData(input)[,i] ==levels(pData(input)[,i])[2])]
                }

            # Define the comparison string
            comparison <- paste(levels(pData(input)[,i])[1],
                levels(pData(input)[,i])[2],sep=" - ")

            condition_lev <- TRUE

            if (!((ncol(datos1) > 1) && (ncol(datos2) > 1)))
                stop("Error. NOISeqBIO needs at least 2 biological replicates ",
                    "per condition.\n")
            }
            else {
                if (is.null(conditions))
                    stop("Error. You must specify which conditions you wish ",
                        "to compare when the factor has two or more ",
                        "conditions.\n")
                if (length(conditions) != 2)
                    stop("Error. The argument conditions must contain the 2 ",
                        "conditions you wish to compare.")
            
                l <- conditions %in% pData(input)[,i]
                # If they are defined,they will be TRUE
                if (l[1] == TRUE && l[2] == TRUE) {
                    if (!is.null(assayData(input)$exprs)) {
                        datos1 <- assayData(input)$exprs[,which(
                            pData(input)[,i] == conditions[1])]
                        datos2 <- assayData(input)$exprs[,which(
                            pData(input)[,i] == conditions[2])]
                    }
                    else {
                        datos1 <- assayData(input)$counts[,which(
                            pData(input)[,i] == conditions[1])]
                        datos2 <- assayData(input)$counts[,which(
                            pData(input)[,i] == conditions[2])]
                    }
                    
                    # Define the comparison string
                    comparison <- paste(conditions[1],conditions[2],sep=" - ")
                    condition_lev <- TRUE
                }
            }
        }
    }

    if (condition_fac == FALSE)
        stop("The factor specified does not correspond with any of the ",
            "ones you have defined.") 
    if (condition_lev == FALSE)
        stop("The conditions specified don't exist for the factor specified.\n")

    #--------------------------------------------------------------------------#

    # Number of observations within each condition 
    n1 <- ncol(as.matrix(datos1))
    n2 <- ncol(as.matrix(datos2))
    if (max(n1,n2) == 1) 
        stop("There is only one replicate per condition. Please,use ",
            "NOISeq instead of NOISeqBIO.\n")

    # Rounding off data
    if (norm == "n") {      # no normalization
        datos1 <- round(datos1,10)
        datos2 <- round(datos2,10)
    }

    # Computing k
    if (is.null(k)) {
        m1 <- min(datos1[noceros(datos1,num=FALSE)],na.rm=TRUE)
        m2 <- min(datos2[noceros(datos2,num=FALSE)],na.rm=TRUE)
        k <- min(m1,m2)/2          
    }

    # Total counts for each gene:
    suma1 <- rowSums(as.matrix(datos1))
    suma2 <- rowSums(as.matrix(datos2))

    # Genes with counts in any condition
    concounts <- names(which(suma1+suma2 > 0))

    # All genes
    todos <- rownames(as.matrix(datos1))

    # Gene length
    long <- 1000
    g.sinL <- NULL  # genes with no length defined

    if (!is.null(featureData(input)@data$Length)) {
        g.sinL <- names(which(is.na(featureData(input)@data$Length)))
        if (any(!is.na(featureData(input)@data$Length)) == TRUE) 
            long <- featureData(input)@data[concounts,"Length"]
    }

    # Genes with counts and with length
    gens.sin0 <- setdiff(concounts,g.sinL)

    # cond1 and cond2 in the same matrix
    datitos <- cbind(datos1,datos2)  
    datitos <- datitos[concounts,]  # selecting only genes with counts

    # Sequencing depth when filtering method=3
    if (filter == 3 && is.null(depth)) 
        depth <- colSums(datitos)

    #-------------------------------------------------------------------------#
    #-------------------------------------------------------------------------#

    ## Normalization  
    if (norm == "n") {       # no normalization
        datitos.0 <- sinceros(datitos,k=k)
        datitos.norm <- datitos.0[gens.sin0,]
    }

    if (norm == "rpkm") {      # RPKM
        datitos.0 <- noirpkm(datitos,long=long,k=k,lc=lc)
        datitos.norm <- datitos.0[gens.sin0,]
    }

    if (norm == "uqua") {      # Upper Quartile
        datitos.0 <- noiuqua(datitos,long=long,lc=lc,k=k)
        datitos.norm <- datitos.0[gens.sin0,] 
    }

    if (norm == "tmm") {
        datitos.0 <- noitmm(datitos,long=long,lc=lc,k=k)
        datitos.norm <- datitos.0[gens.sin0,]       
    }

    #-------------------------------------------------------------------------#

    ## Filtering out low count features
    if (filter != 0) {
        datos.filt <- filtered.data(dataset=datitos.norm,
            factor=c(rep("cond1",n1),rep("cond2",n2)),norm=TRUE,depth=depth,
            method=filter,cv.cutoff=cv.cutoff,cpm=cpm)
    } 
    else {
        datos.filt <- datitos.norm
    }

    datos1.filt <- datos.filt[,seq_len(n1)]
    datos2.filt <- datos.filt[,(n1+1):(n1+n2)]   

    #-------------------------------------------------------------------------#

    ## Noise distribution

    Zr <- NULL

    if (n1+n2 <= 9) {  # sharing information within clusters
        Zr=share.info(mydata=datos.filt,n1=n1,n2=n2,r=r,nclust=nclust)
    } 
    else {   # r permutations
        for (i in seq_len(r)) {
            print(paste("r =",i))
            mipermu <- sample(seq_len(n1+n2))
            mipermu <- datos.filt[,mipermu]
            mean1 <- rowMeans(mipermu[,seq_len(n1)])
            mean2 <- rowMeans(mipermu[,(n1+1):(n1+n2)])
            sd1 <- apply(mipermu[,seq_len(n1)],1,sd)
            sd2 <- apply(mipermu[,(n1+1):(n1+n2)],1,sd)
            
            myparam <- list("n"=c(n1,n2),"sd"=cbind(sd1,sd2))
            MDperm <- MDbio(dat=cbind(mean1,mean2),param=myparam,a0per=a0per)
            Zr <- cbind(Zr,myDfunction(mydif=MDperm$D,myrat=MDperm$M,stat=1,
                coef=0.5))
        }
    }

    #-------------------------------------------------------------------------#

    ## Z-score for different experimental conditions (SIGNAL)

    mean1 <- rowMeans(as.matrix(datos1.filt))
    mean2 <- rowMeans(as.matrix(datos2.filt))        

    sd1 <- apply(as.matrix(datos1.filt),1,sd)
    sd2 <- apply(as.matrix(datos2.filt),1,sd)

    myparam=list("n"=c(n1,n2),"sd"=cbind(sd1,sd2))

    MDs <- MDbio(dat=cbind(mean1,mean2),param=myparam,a0per=a0per)

    Zs <- myDfunction(mydif=MDs$D,myrat=MDs$M,stat=1,coef=0.5)

    #-------------------------------------------------------------------------#

    ## Completing M and D (in signal)

    lev1 <- mean1[todos]
    lev2 <- mean2[todos]
    names(lev1) <- names(lev2) <- todos

    Zs <- as.numeric(Zs)
    names(Zs) <- rownames(MDs$M)

    Zs <- Zs[todos]
    names(Zs) <- todos

    ## Computing Zn
    Zn <- as.numeric(Zr)

    #-------------------------------------------------------------------------#

    ## Results
    list("k"=k,"comp"=comparison,"Level1"=lev1,"Level2"=lev2,"Zs"=Zs,"Zn"=Zn)
}

## Function to summarize difference and ratio information (D and D0)
myDfunction <- function (mydif,myrat,stat,coef) {
    if (stat == 1) { # linear combination of difference and ratio
        myDvalues=coef*mydif + (1-coef)*myrat
    }
    if (stat == 2) { # distance to origin from (ratio,difference)
        myDvalues=sign(mydif) * sqrt((mydif)^2 + (myrat)^2)  
    }
    myDvalues
}

################################################################################

################################################################################

# ARSyNcomponents.R

# This program selects the number of components that explain more than the 
# Variability% If Variability="average" the number of components will be those 
# that explain more than the average variation of the principal components
# For residuals model the number of components selected are 
# beta*average-variability.

ARSyNcomponents <- function(asca=asca,Variability=0.75,beta=2) {
    MODEL<-asca[-length(asca)]
    M<-length(MODEL)-1
    output<-NULL

    if (Variability=="average") {
        for (i in seq_len(M)) { 
            lim <- 1/rankMatrix(MODEL[[i]]$X)[1]    
            t <- table(MODEL[[i]]$var.exp[,1]>lim)
            if (length(t)==1) {
                t[2]=0
            }
            t <- t[2]
            names(t)<-names(MODEL)[i]
            output<-c(output,t)
        }
    }

    if (Variability!="average") {
        lim<-Variability
        for (i in seq_len(M)) { 
            t <- which(MODEL[[i]]$var.exp[,2]>lim)[1]
            names(t) <- names(MODEL)[i]
            output <- c(output,t)
        }
    }
    
    ### Residuals model
    
    i <- M+1
    lim <- beta*1/rankMatrix(MODEL[[i]]$X)[1]   
    t <- table(MODEL[[i]]$var.exp[,1]>lim)
    if (length(t)==1) {
        t[2]=0
    }
    t <- t[2]
    names(t)<-names(MODEL)[i]
    output <- c(output,t)

    output
}

################################################################################

################################################################################

# ARSyNmodel.R

ARSyNmodel <- function(X=X,Factors=2,Designa=Designa,Designb=Designb,
    Designc=Designc,Variability="average",Join=TRUE,Interaction=TRUE,beta=2) {

    if (Factors==1) {
        Fac0 <- c(1,2)
        names(Fac0) <- c("Model.a","Model.res")
        asca0 <- ASCA.1f(X=X,Designa=Designa,Fac=Fac0)
        Fac <- ARSyNcomponents(asca0,Variability=Variability,beta=beta)
        for (i in seq_len(length(Fac))) {
            Fac0[names(Fac[i])]<-Fac[names(Fac[i])]
        }
        asca <- ASCA.1f(X=X,Designa=Designa,Fac=Fac0)
    }



    if (Factors==2) {
        Fac0 <- c(1,2,2,2)
        names(Fac0) <- c("Model.a","Model.b","Model.ab","Model.res")
        if (Join[1]) {
            names(Fac0)[3] <- c("Model.bab")
        }
        asca0 <- ASCA.2f(X=X,Designa=Designa,Designb=Designb,Fac=Fac0,Join=Join,
            Interaction=Interaction)
        Fac <- ARSyNcomponents(asca0,Variability=Variability,beta=beta)
        for (i in seq_len(length(Fac))) {
            Fac0[names(Fac[i])]<-Fac[names(Fac[i])]
        }
        asca <- ASCA.2f(X=X,Designa=Designa,Designb=Designb,Fac=Fac0,Join=Join,
            Interaction=Interaction)
    }

    if (Factors==3) {
        Fac0 <- c(0,2,2,2,2,2,2,2)
        names(Fac0) <- c("Model.a","Model.b","Model.c","Model.ab","Model.ac",
            "Model.bc","Model.abc","Model.res")
        if (Join[1]) {
            names(Fac0)[4]<-c("Model.bab")
        }
        if (Join[2]) {
            names(Fac0)[5]<-c("Model.cac")
        }
        asca0 <- ASCA.3f(X=X,Designa=Designa,Designb=Designb,Designc=Designc,
            Fac=Fac0,Join=Join,Interaction=Interaction)
        Fac<-ARSyNcomponents(asca0,Variability=Variability,beta=beta)
        for (i in seq_len(length(Fac))) {
            Fac0[names(Fac[i])]<-Fac[names(Fac[i])]
        }
        asca <- ASCA.3f(X=X,Designa=Designa,Designb=Designb,Designc=Designc,
            Fac=Fac0,Join=Join,Interaction=Interaction)
    }

    Input <- asca$Input
    asca$Input <- c(Input,Factors)
    names(asca$Input) <- c(names(Input),"Factors")

    output <- asca
    output
}

################################################################################

################################################################################

# ARSyNseq.R

ARSyNseq <- function(data,factor=NULL,batch=FALSE,norm="rpkm",logtransf=FALSE,
    Variability=0.75,beta=2) {

    Join <- TRUE
    Interaction <- TRUE

    dat <- as.matrix(assayData(data)$exprs)
    long <- featureData(data)@data[rownames(dat),"Length"]
    if (is.null(long)) 
        long <- 1000

    if (norm == "rpkm") {
        dat <- noirpkm(dat,long=long,k=0,lc=1)
    }

    if (norm == "uqua") {
        dat <- noiuqua(dat,long=long,lc=1,k=0)
    }

    if (norm == "tmm") {
        dat <- noitmm(dat,long=long,lc=1,k=0)      
    }

    if (!logtransf)   dat <- log(dat + 1)

    X <- t(dat)         #conditions x genes

    #----------------------------------

    if (is.null(factor)) {
        Covariates <- t(pData(data))
        Num.factors <- nrow(Covariates)
        labels.factors <- rownames(Covariates)
        Design <- list(NULL,NULL,NULL)
        for (i in seq_len(Num.factors)) {
            x <- as.character(Covariates[i,])
            Design[[i]] <- make.ASCA.design(x)
        } 
    }
    else {
        Covariates <- pData(data)[,factor]
        Num.factors <- 1
        labels.factors <- factor
        Design <- list(NULL,NULL,NULL)
        x <- as.character(Covariates)
        Design[[1]] <- make.ASCA.design(x)
    }

    ####################################
    ### --- Execute ASCAmodel 
    ####################################

    my.asca <- ARSyNmodel(Factors=Num.factors,X=X,Designa=Design[[1]],
        Designb=Design[[2]],Designc=Design[[3]],Join=Join,
        Interaction=Interaction,Variability=Variability,beta=beta)

    #################################### 
    ### --- Writing filtered matrix 
    ####################################

    X.filtered <- X
    M <- length(my.asca)-1

    if (!batch) {
        X.filtered <- X.filtered-my.asca[[M]]$TP
    }

    if(batch) {
        X.filtered <- X.filtered-my.asca[[1]]$TP 
    }

    data.filtered <- t(X.filtered)

    if (!logtransf)
        data.filtered <- exp(data.filtered)+1
    
    exprs(data) <- data.filtered

    return(data)
}

################################################################################

# ASCA1f.R

ASCA.1f <- function(X=X,Designa=Designa,Designb=NULL,Designc=NULL,Fac=c(1,2),
    Join=NULL,Interaction=NULL) {

    n <- ncol(X)
    p <- nrow(X)
    I <- ncol(Designa)

    Faca <- Fac[1] # number components Model a (time)
    Facres <- Fac[2] # number components Residues

    #----------------------- Calculate Overall Mean ----------------------------

    offset <- apply(X,2,mean)
    Xoff <- X-(cbind(matrix(1,nrow=p,ncol=1))%*%rbind(offset))

    #-----------------------  PART I: Submodel a (TIME) ------------------------

    Model.a<-ASCAfun1(Xoff,Designa,Faca)
    Xres<-Xoff-Model.a$X

    #------------------------Collecting models ---------------------------------

    models <- ls(pattern="Model")
    output <- vector(mode="list")
    Xres <- Xoff
    for (i in seq_len(length(models))) {
        mymodel <- get(models[i], envir=environment())
        output <- c(output, list(mymodel))
        Xres <- Xres - mymodel$X
        rm(mymodel)
        gc()
    }
    names(output) <- models

    #------------------------- PART III: Submodel res --------------------------

    Model.res <- ASCAfun.res(Xres,Facres)

    Model.res <- list(Model.res)
    names(Model.res) <- c("Model.res")
    output <- c(output,Model.res)

    #------------------------- Add Input data to the Output --------------------

    Input <- list(X,Designa,Designb,Designc,Fac,Join,Interaction)
    names(Input)<-c("X", "Designa", "Designb", "Designc", "Fac", "Join",
        "Interaction")
    Input <- list(Input)
    names(Input) <- "Input"
    output <- c(output,Input)
    
    output
}

################################################################################

################################################################################

# ASCA2f.R

ASCA.2f <- function(X=X,Designa=Designa,Designb=Designb,Designc=NULL,
    Fac=c(1,2,2,2),Join = TRUE,Interaction=TRUE) {

    n <- ncol(X)
    p <- nrow(X)
    I <- ncol(Designa)
    J <- ncol(Designb)

    Faca <- Fac[1] # number components Model a (time)
    Facb <- Fac[2] # number components Model b  (second factor)
    Facab <- Fac[3] # number components Model ab (interaction) 
    Facres <- Fac[4] # number components Residues

    #----------------------- Calculate Overall Mean ----------------------------

    offset <- apply(X,2,mean)
    Xoff <- X-(cbind(matrix(1,nrow=p,ncol=1))%*%rbind(offset))

    #-----------------------  PART I: Submodel a (TIME) ------------------------

    Model.a <- ASCAfun1(Xoff,Designa,Faca)
    Xres <- Xoff-Model.a$X

    #-------------------------- PART II: Submodel b and ab ---------------------

    if (!Join) {
        Model.b<-ASCAfun1(Xoff,Designb,Facb)
        if (Interaction) {
            Model.ab<-ASCAfun2(Xoff,Designa,Designb,Facab)
        }
    }

    if (Join) {
        Model.bab <- ASCAfun12(Xoff,Designa,Designb,Facab)
    }

    #------------------------Collecting models ---------------------------------

    models <- ls(pattern="Model")
    output <- vector(mode="list")
    Xres <- Xoff
    for (i in seq_len(length(models))) {
        mymodel <- get(models[i],envir=environment())
        output <- c(output,list(mymodel))
        Xres <- Xres - mymodel$X
        rm(mymodel)
        gc()    
    }

    names(output) <- models

    #------------------------- PART III: Submodel res --------------------------

    Model.res <- ASCAfun.res(Xres,Facres)

    Model.res <- list(Model.res)
    names(Model.res) <- c("Model.res")
    output <- c(output,Model.res)

    #------------------------- Add Input data to the Output --------------------

    Input <- list(X,Designa,Designb,Designc,Fac,Join,Interaction,Xoff)
    names(Input) <- c("X","Designa","Designb","Designc","Fac","Join",
        "Interaction","Xoff")
    Input <- list(Input)
    names(Input) <- "Input"
    output <- c(output,Input)

    output
}

################################################################################

################################################################################

# ASCA3f.R

ASCA.3f <- function(X=X,Designa=Designa,Designb=Designb,Designc=Designc,
    Fac=c(1,2,2,2,2,2,2,2),Join=c(TRUE,TRUE),
    Interaction=c(TRUE,TRUE,TRUE,TRUE)) {

    n <- ncol(X)
    p <- nrow(X)
    I <- ncol(Designa)
    J <- ncol(Designb)
    K <- ncol(Designc)

    Faca <- Fac[1]# number components Model a (time)
    Facb <- Fac[2] # number components Model b  (second factor)
    Facc <- Fac[3] # number components Model c  (third factor)
    Facab <- Fac[4] # number components Model ab (interaction) 
    Facac <- Fac[5] # number components Model ac (interaction) 
    Facbc <- Fac[6] # number components Model bc (interaction) 
    Facabc <- Fac[7]  # number components Model abc (interaction) 
    Facres <- Fac[8]
    
    #----------------------- Calculate Overall Mean ----------------------------

    offset <- apply(X,2,mean)
    Xoff <- X-(cbind(matrix(1,nrow=p,ncol=1))%*%rbind(offset))

    #-----------------------  PART I: Submodel a (TIME) ------------------------

    Model.a <- ASCAfun1(Xoff,Designa,Faca)
    Xres <- Xoff-Model.a$X

    #-------------------------- PART II.1: Submodel b.ab------------------------
    if(!Join[1]) {
        Model.b <- ASCAfun1(Xoff,Designb,Facb)
        if (Interaction[1]) {
            Model.ab <- ASCAfun2(Xoff,Designa,Designb,Facab)
        }
    }

    if(Join[1]) {
        Model.bab<-ASCAfun12(Xoff,Designa,Designb,Facab)
    }
    
    #-------------------------- PART II.2: Submodel (c.ac) ---------------------
    if(!Join[2]) {
        Model.c <- ASCAfun1(Xoff,Designc,Facc)
        if (Interaction[2]) {
            Model.ac<-ASCAfun2(Xoff,Designa,Designc,Facac)
        }
    }

    if(Join[2]) {
        Model.cac<-ASCAfun12(Xoff,Designa,Designc,Facac)
    }
    
    #-------------------------- PART II.3: Submodel (bc) -----------------------

    if (Interaction[3]) {
        Model.bc<-ASCAfun2(Xoff,Designb,Designc,Facbc)
    }
    
    #-------------------------- PART II.4: Submodel (abc) ----------------------

    if (Interaction[4]) {
        Model.abc<-ASCAfun.triple(Xoff,Designa,Designb,Designc,Facabc)
    }
    #------------------------Collecting models ---------------------------------

    models <- ls(pattern="Model")
    output <- vector(mode="list")
    Xres <- Xoff
    for (i in seq_len(length(models))) {
        mymodel <- get(models[i], envir=environment())
        output <- c(output, list(mymodel))
        Xres <- Xres - mymodel$X
        rm(mymodel)
        gc()
    }
    names(output) <- models

    #------------------------- PART III: Submodel res --------------------------

    Model.res <- ASCAfun.res(Xres,Facres)

    Model.res <- list(Model.res)
    names(Model.res) <- c("Model.res")
    output <- c(output,Model.res)

    #------------------------- Add Input data to the Output --------------------

    Input <- list(X, Designa, Designb, Designc, Fac, Join,Interaction,Xoff)
    names(Input) <- c("X","Designa","Designb","Designc","Fac","Join",
        "Interaction","Xoff")
    Input <- list(Input)
    names(Input) <- "Input"
    output<-c(output,Input)

    output
}

################################################################################

################################################################################

# ASCAfun1.R

ASCAfun1 <- function(X,Design,Fac) {
    n <- ncol(X) # number of genes
    I <- ncol(Design) # number of levels in the factor

    NK <- NULL
    XK <- matrix(NA,nrow=I,ncol=n)

    for (i in seq_len(I)) {
        sub <- X[Design[,i]==1,]
        if (is.null(nrow(sub))) { #when there isn't replicates
            NK[i] <- 1
            XK[i,] <- sub 
        }
        else {
        NK[i]<-nrow(sub)
        XK[i,]<-apply(sub,2,mean) }
    }
    NK<-sqrt(NK)

    # Weigh the data of the Submodel with the corresponding number of 
    # measurement occasions

    XKw <- NK*XK

    PCA <- PCA.GENES(XKw)
    scw <- PCA$scores[,seq_len(Fac)]
    ld <- PCA$loadings[,seq_len(Fac)]
    ssq <- PCA$var.exp
    if (Fac==1) {
        scw <- as.matrix(scw)
        ld <- as.matrix(ld)
    }      
    if (Fac==0) {
        scw<-as.matrix(rep(0,I))
        ld<-as.matrix(rep(0,n))
    }        
    
    # Re-weigth the scores
    sc <- scw/NK
    XKrec <- sc%*%t(ld)

    Xa <- NULL
    TPa <- NULL
    for (i in seq_len(nrow(X))) {
        position<-which(Design[i,]==1)
        Xa<-rbind(Xa,XK[position,])
        TPa<-rbind(TPa,XKrec[position,])
    }

    Ea <- Xa-TPa

    #Leverage & SPE 
    leverage<-apply(ld^2,1,sum)
    SPE<-apply(Ea^2,2,sum)

    output <- list(XK,sc,ld,ssq,Xa,TPa,Ea,leverage,SPE)
    names(output) <- c("data","scores","loadings","var.exp","X","TP","E",
        "leverage","SPE")
    
    output
}

################################################################################

################################################################################

# ASCAfun2.R

ASCAfun2 <- function(X,Desa,Desb,Fac) {
    n <- ncol(X) # number of genes
    I <- ncol(Desa) # number of levels in the factor TIME
    J <- ncol(Desb) # number of levels in the other factor

    XK1 <- matrix(NA,nrow=I,ncol=n)

    for (i in seq_len(I)) {
        sub <- X[Desa[,i]==1,]
        if (is.null(nrow(sub))) { #when there isn't replicates
            XK1[i,] <- sub 
        }
        else {
            XK1[i,] <- apply(sub,2,mean) 
        }
    }

    XK2 <- matrix(NA,nrow=J,ncol=n)

    for (j in seq_len(J)) {
        sub <- X[Desb[,j]==1,]
        if (is.null(nrow(sub))) { #when there isn't replicates
            XK2[j,] <- sub 
        }
        else {
            XK2[j,]<-apply(sub,2,mean) 
        }
    }

    NK <- matrix(NA,nrow=I,ncol=J)
    XK <- matrix(NA,nrow=I*J,ncol=n)

    k=1
    for (j in seq_len(J)){
        for (i in seq_len(I)) {
            sub <- X[(Desa[,i]+Desb[,j])==2,]
            if (is.null(nrow(sub))) {  #when there isn't replicates
                NK[i,j] <- 1
                XK[k,] <- sub-XK1[i,]-XK2[j,]
            }
            else {
                NK[i,j] <- sqrt(nrow(sub))
                XK[k,] <- apply(sub,2,mean)-XK1[i,]-XK2[j,] 
            }
            k <- k+1
        }
    }

    XKw <- XK*(as.numeric(NK))

    PCA <- PCA.GENES(XKw)
    scw <- PCA$scores[,seq_len(Fac)]
    ld <- PCA$loadings[,seq_len(Fac)]
    ssq <- PCA$var.exp
    if (Fac==1) {
        scw <- as.matrix(scw)
        ld <- as.matrix(ld) 
    }
    if (Fac==0) {
        scw <- as.matrix(rep(0,I*J))
        ld <- as.matrix(rep(0,n))
    }
    
    # Re-weigth the scores
    sc <- scw/(as.numeric(NK))
    
    XKrec <- sc%*%t(ld)

    Xab <- NULL
    TPab <- NULL

    for (i in seq_len(nrow(X))) {
        position1 <- which(Desa[i,]==1)
        position2 <- which(Desb[i,]==1)
        Xab <- rbind(Xab,XK[I*(position2-1)+position1,])
        TPab <- rbind(TPab,XKrec[I*(position2-1)+position1,])
    }
    
    Eab <- Xab-TPab

    leverage <- apply(ld^2,1,sum)
    SPE <- apply(Eab^2,2,sum)

    output <- list(XK,sc,ld,ssq,Xab,TPab,Eab,leverage,SPE)
    names(output)<-c("data","scores","loadings","var.exp","X","TP","E",
        "leverage","SPE")
    
    output
}

################################################################################

################################################################################

# ASCAfun12.R

ASCAfun12 <- function (X,Desa,Desb,Fac) {
    n <- ncol(X) # number of genes
    I <- ncol(Desa) # number of levels in the factor TIME
    J <- ncol(Desb) # number of levels in the other factor

    XK1 <- matrix(NA,nrow=I,ncol=n)

    for (i in seq_len(I)) {
        sub <- X[Desa[,i]==1,]
        XK1[i,] <- apply(sub,2,mean)
    }
    
    NK <- matrix(NA,nrow=I,ncol=J)
    XK <- matrix(NA,nrow=I*J,ncol=n)

    k <- 1
    for (j in seq_len(J)) {
        for (i in seq_len(I)) {
            sub <- X[(Desa[,i]+Desb[,j])==2,]
            if (is.null(nrow(sub))) {  #when there isn't replicates
                NK[i,j] <- 1
                XK[k,] <- sub-XK1[i,]
            }
            else {
                NK[i,j] <- sqrt(nrow(sub))
                XK[k,] <- apply(sub,2,mean)-XK1[i,] 
            }
            k <- k+1
        }
    }

    XKw <- XK*(as.numeric(NK))

    PCA <- PCA.GENES(XKw)
    scw <- PCA$scores[,seq_len(Fac)]
    ld <- PCA$loadings[,seq_len(Fac)]
    ssq <- PCA$var.exp

    if (Fac==1) {
        scw <- as.matrix(scw)
        ld <- as.matrix(ld) 
    }
    if (Fac==0) {
        scw <- as.matrix(rep(0,I*J))
        ld <- as.matrix(rep(0,n))
    }
    
    # Re-weigth the scores
    sc <- scw/(as.numeric(NK))
    
    XKrec <- sc%*%t(ld)

    Xab <- NULL
    TPab <- NULL
    for (i in seq_len(nrow(X))){
        position1 <- which(Desa[i,]==1)
        position2 <- which(Desb[i,]==1)
        Xab <- rbind(Xab,XK[I*(position2-1)+position1,])
        TPab <- rbind(TPab,XKrec[I*(position2-1)+position1,])
    }
    Eab <- Xab-TPab

    #Leverage & SPE 
    leverage <- apply(ld^2,1,sum)
    SPE <- apply(Eab^2,2,sum)
    output <- list(XK,sc,ld,ssq,Xab,TPab,Eab,leverage,SPE)
    names(output) <- c("data","scores","loadings","var.exp","X","TP","E",
        "leverage","SPE")
    
    output
}

################################################################################

################################################################################

# ASCAfunres.R

ASCAfun.res <- function (X,Fac) {
    PCA <- PCA.GENES(X)
    sc <- PCA$scores[,seq_len(Fac)]
    ld <- PCA$loadings[,seq_len(Fac)]
    ssq <- PCA$var.exp
    if (Fac==1) {
        sc <- as.matrix(sc)
        ld <- as.matrix(ld)
    }
    TPres<-sc%*%t(ld)
    if (Fac==0) {
        sc <- 0
        ld <- 0
        TPres<-matrix(0,nrow(X),ncol(X))
    }      
    Eres<-X-TPres
    output <- list(sc,ld,ssq,X,TPres,Eres)
    names(output) <- c("scores","loadings","var.exp","X","TP","E")
    output
}

################################################################################

################################################################################

# ASCAfun-triple.R

ASCAfun.triple <- function(X,Desa,Desb,Desc,Fac) {

    n <- ncol(X) # number of genes
    I <- ncol(Desa) # number of levels in the factor TIME
    J <- ncol(Desb) # number of levels in the other factor
    H <- ncol(Desc) # number of levels in the other factor

    #Matrices con medias efectos individuales
    XK1 <- matrix(NA,nrow=I,ncol=n)
    for (i in seq_len(I)) {
        sub <- X[Desa[,i]==1,]
        XK1[i,] <- apply(sub,2,mean)
    }

    XK2 <- matrix(NA,nrow=J,ncol=n)
    for (j in seq_len(J)) {
        sub <- X[Desb[,j]==1,]
        XK2[j,] <- apply(sub,2,mean)
    }

    XK3 <- matrix(NA,nrow=H,ncol=n)
    for (h in seq_len(H)) {
        sub <- X[Desc[,h]==1,]
        XK3[h,] <- apply(sub,2,mean)
    }

    #Matrices con medias de efectos simples

    XK12 <- matrix(NA,nrow=I*J,ncol=n)
    k <- 1
    for (j in seq_len(J)) {
        for (i in seq_len(I)) {
            sub <- X[(Desa[,i]+Desb[,j])==2,]
            XK12[k,] <- apply(sub,2,mean)
            k <- k+1
        }
    }

    XK13 <- matrix(NA,nrow=I*H,ncol=n)
    k <- 1
    for (h in seq_len(H)) {
        for (i in seq_len(I)) {
            sub <- X[(Desa[,i]+Desc[,h])==2,]
            XK13[k,] <- apply(sub,2,mean)
            k <- k+1
        }
    }

    XK23 <- matrix(NA,nrow=J*H,ncol=n)
    k <- 1
    for (h in seq_len(H)){
        for (j in seq_len(J)){
            sub <- X[(Desb[,j]+Desc[,h])==2,]
            XK23[k,] <- apply(sub,2,mean)
            k <- k+1
        }
    }

    NK <- matrix(NA,nrow=I,ncol=J*H)
    XK <- matrix(NA,nrow=I*J*H,ncol=n)
    k <- 1

    for (h in seq_len(H)) {
        for (j in seq_len(J)) {
            for (i in seq_len(I)) {
                sub <- as.matrix(rbind(X[(Desa[,i]+Desb[,j]+Desc[,h])==3,]))
                NK[i,(h-1)*J+j] <- sqrt(nrow(sub))
                XK[k,] <- apply(sub,2,mean)+XK1[i,]+
                    XK2[j,]+XK3[h,]-XK12[(j-1)*I+i,]-XK13[(h-1)*I+i,]-
                    XK23[(h-1)*J+j,]
                k <- k+1
            }
        }
    }

    XKw <- XK*(as.numeric(NK))

    PCA <- PCA.GENES(XKw)
    scw <- PCA$scores[,seq_len(Fac)]
    ld <- PCA$loadings[,seq_len(Fac)]
    ssq <- PCA$var.exp
    if (Fac==1) {
        scw <- as.matrix(scw)
        ld <- as.matrix(ld) 
    }
    if (Fac==0) {
        scw <- as.matrix(rep(0,I*J*H))
        ld <- as.matrix(rep(0,n))
    }
    # Re-weigth the scores
    sc <- scw/(as.numeric(NK))
    
    XKrec <- sc%*%t(ld)

    Xabc <- NULL
    TPabc <- NULL

    for (i in seq_len(nrow(X))){
        position1 <- which(Desa[i,]==1)
        position2 <- which(Desb[i,]==1)
        position3 <- which(Desc[i,]==1)
        Xabc <- rbind(Xabc,XK[I*(position2-1)+I*J*(position3-1)+position1,])
        TPabc <- rbind(TPabc,XKrec[I*(position2-1)+
            I*J*(position3-1)+position1,])
    }
    Eabc <- Xabc-TPabc

    #Leverage & SPE 
    leverage <- apply(ld^2,1,sum)
    SPE <- apply(Eabc^2,2,sum)

    output <- list(XK,sc,ld,ssq,Xabc,TPabc,Eabc,leverage,SPE)
    names(output) <- c("data","scores","loadings","var.exp","X","TP","E",
        "leverage","SPE")
    
    output
}

################################################################################

################################################################################

# auxiliar.R

busca <- function (x,S) {
    which(S[,1] == x[1] & S[,2] == x[2])
}

int.mult <- function(lista,todos=NULL) {
    if (is.null(todos)) {
        todos <- unlist(lista)
    }
    comunes <- todos
    for (i in seq_len(length(lista))) {
        comunes <- intersect(comunes,lista[[i]])
    }
    comunes
}

n.menor <- function (x,S1,S2) {
    length(which(S1 <= x[1] &  S2 <= x[2]))
}

noceros <- function (x,num=TRUE,k=0) {
    nn <- length(which(x > k))
    if (num) {
        nn 
    } 
    else {
        if(nn > 0) { 
            which(x > k) 
        } 
        else { 
            NULL 
        }
    }
}

sinceros <- function (datos,k) {
    datos <- as.matrix(datos)
    datos0 <- as.matrix(datos)
    
    if (is.null(k)) {
        mini0 <- min(datos[noceros(datos,num=FALSE,k=0)])
        kc <- mini0/2
        datos0[datos0 == 0] <- kc
    } 
    else {
        datos0[datos0 == 0] <- k
    }
    datos0
}

sim.samples <- function(counts1,counts2=NULL,pnr=1,nss=5,v=0.02) {
    seqdep <- c(sum(counts1),sum(counts2))
    num.reads1 <- (pnr + c(-v,v))*seqdep[1]   
    muestras <- vector("list")
    muestras$c1 <- NULL
    for (s in seq_len(nss)) {
        tama <- round(runif(1,num.reads1[1],num.reads1[2]),0)
        muestras$c1 <- cbind(muestras$c1,rmultinom(1,size=tama,prob=counts1))
    }
    
    if (!is.null(counts2)) {
        num.reads2 <- (pnr + c(-v,v))*seqdep[2]
        muestras$c2 <- NULL
        for (s in seq_len(nss)) {
            tama <- round(runif(1,num.reads2[1],num.reads2[2]),0)
            muestras$c2 <- cbind(muestras$c2,
                rmultinom(1,size=tama,prob=counts2))
        }
    }
    muestras
}

ranking <- function(results) {
    M <- results$M
    D <- results$D
    prob <- results$prob
    
    ## Changing NA by 0
    M[is.na(M)] <- 0
    D[is.na(D)] <- 0
    prob[is.na(prob)] <- 0
    
    
    ## Ranking
    ranking5 <- sqrt(M*M + D*D)*sign(M)
    
    ## Ranking results
    theranking <- data.frame(rownames(results),ranking5)
    rownames(theranking) <- NULL
    colnames(theranking) <- c("ID","statistic")
    
    theranking
}

plot.y2 <- function(x,yright,yleft,yrightlim=range(yright,na.rm=TRUE),
    yleftlim=range(yleft,na.rm=TRUE),xlim=range(x,na.rm=TRUE),xlab=NULL,
    yylab=c("",""),lwd=c(2,2),pch=c(1,2),col=c(1,2),type=c("o","o"),
    linky=TRUE,smooth=0,bg=c("white","white"),lwds=1,length=10,...,x2=NULL,
    yright2=NULL,yleft2=NULL,col2=c(3,4)) {

    ## Plotting RIGHT axis data
    plot(x,yright,axes=FALSE,ylab="",xlab=xlab,ylim=yrightlim,xlim=xlim,
        pch=pch[1],type=type[1],lwd=lwd[1],col=col[1],...)

    axis(4,pretty(yrightlim,length),col=1,col.axis=1)

    if (is.null(yright2) == FALSE) {
        points(x2,yright2,type=type[1],pch=pch[1],lwd=lwd[1],col=col2[1],...)
    }

    if (smooth != 0) 
        lines(supsmu(x,yright,span=smooth),col=col[1],lwd=lwds,...)

    if (yylab[1]=="") {
        mtext(deparse(substitute(yright)),side=4,outer=FALSE,line=2,col=1,...)
    } 
    else {
        mtext(yylab[1],side=4,outer=FALSE,line=2,col=1,...)
    }

    par(new=TRUE)

    ## Plotting LEFT axis data
    plot(x,yleft,axes=FALSE,ylab="" ,xlab=xlab,ylim=yleftlim,xlim=xlim,bg=bg[1],
        pch=pch[2],type=type[2],lwd=lwd[2],col=col[2],...)
    box()
    axis(2,pretty(yleftlim,length),col=1,col.axis=1)

    if (is.null(yleft2) == FALSE) {
        points(x2,yleft2,type=type[2],pch=pch[2],bg=bg[2],lwd=lwd[2],
            col=col2[2],...)
    }

    if (smooth != 0) 
        lines(supsmu(x,yleft,span=smooth),col=col[2],lwd=lwds,...)

    if(yylab[2] == "") {
        mtext(deparse(substitute(yleft)),side=2,outer=FALSE,line=2,col=1,...)
    } 
    else {
        mtext(yylab[2],side=2,outer=FALSE,line=2,col=1,...)
    }

    ## X-axis
    axis(1,at=pretty(xlim,length))
}

logscaling <- function(data,base=2,k=1) {
    logmaximo <- round(max(data,na.rm=TRUE),0)
    numceros <- nchar(logmaximo)-1
    etiquetas <- c(0,10^(seq_len(numceros)))
    donde <- log(etiquetas + k,base=base)
    data <- log(data + k,base=base)
    list("data"=data,"at"=donde,"labels"=etiquetas)
}

miscolores <- colors()[c(554,89,111,512,17,586,132,428,601,568,86,390)]

################################################################################

################################################################################

# biodetection.plot.R

biodetection.dat <- function(input,factor=NULL,k=0) {
    if (inherits(input,"eSet") == FALSE)
        stop("Error. The input data must be an eSet object\n")

    if (any(!is.na(featureData(input)@data$Biotype)) == FALSE)
        stop ("No biological classification was provided.\nPlease run ",
            "addData() function to add this information\n")

    if (!is.null(assayData(input)$exprs)) {
        dat <- as.matrix(assayData(input)$exprs)
        mysamples <- colnames(assayData(input)$exprs)
    } 
    else {
        dat <- as.matrix(assayData(input)$counts)
        mysamples <- colnames(assayData(input)$counts)
    }
    
    numgenes <- nrow(dat)

    if (is.null(factor)) {  # per sample  
        cat("Biotypes detection is to be computed for:\n")
        print(colnames(dat))
        biotablas <- vector("list",length=NCOL(dat))
        names(biotablas) <- colnames(dat)
    } 
    else {  # per condition
        mifactor <- pData(input)[,factor]
        niveles <- levels(mifactor)
        cat("Biotypes detection is to be computed for:\n")
        print(niveles)
        biotablas <- vector("list",length=length(niveles))
        names(biotablas) <- niveles
        dat <- vapply(niveles,
            function (k) rowSums(as.matrix(dat[,grep(k,mifactor)])),numeric(1))
    }

    infobio <- as.character(featureData(input)@data$Biotype)

    genome <- 100*table(infobio)/sum(table(infobio))
    ordre <- order(genome,decreasing=TRUE)

    for (i in seq_len(length(biotablas))) {
        detect <- dat[,i] > k
        perdet1 <- genome*table(infobio,detect)[names(genome),"TRUE"]/
            table(infobio)[names(genome)]
        perdet2 <- 100*table(infobio,detect)[names(genome),"TRUE"] /
            sum(table(infobio,detect)[,"TRUE"])
        biotablas[[i]] <- as.matrix(rbind(perdet1[ordre],perdet2[ordre]))
        rownames(biotablas[[i]]) <- c("detectionVSgenome","detectionVSsample")
    }

    mybiotable <- list("genome"=genome[ordre],"biotables"=biotablas,
        "genomesize"=numgenes)

    mybiotable
}

biodetection.plot <- function(dat,samples=c(1,2),
    plottype=c("persample","comparison"),toplot="protein_coding",
    toreport=FALSE,...) {

    mypar=par(no.readonly=TRUE)
    plottype=match.arg(plottype)

    if (length(samples) > 2) {
        stop("ERROR: This function cannot generate plots for more than 2 ",
            "samples.\nPlease,use it as many times as needed to generate the ",
            "plots for all your samples.\n")
    }
    
    if (is.numeric(samples)) 
        samples=names(dat$biotables)[samples]
    biotable1 <- rbind(dat$genome,dat$biotables[[samples[1]]],
        rep(0,length(dat$genome)))

    # Computing ylim for left and right axis         
    if (ncol(biotable1) >= 3) {
        ymaxL <- ceiling(max(biotable1[,c(1,2,3)],na.rm=TRUE))
        ymaxR <- max(biotable1[,-c(1,2,3)],na.rm=TRUE)                
    } 
    else {
        ymaxL <- ceiling(max(biotable1,na.rm=TRUE))
        ymaxR=0 
    }

    if (length(samples) == 2) {
        biotable2 <- rbind(dat$genome,dat$biotables[[samples[2]]],
            rep(0,length(dat$genome)))
        if (ncol(biotable2) >= 3) {
            ymax2 <- ceiling(max(biotable2[,c(1,2,3)],na.rm=TRUE))
            ymax2sin <- max(biotable2[,-c(1,2,3)],na.rm=TRUE)
            ymaxR <- ceiling(max(ymaxR,ymax2sin))
        } 
        else {
            ymax2 <- ceiling(max(biotable2,na.rm=TRUE))            
        }
        ymaxL=max(ymaxL,ymax2)     
    }

    # Rescaling biotables (datos2)
    if (length(samples) == 2) {                
        if (ncol(biotable2) >= 3) 
            biotable2[,-c(1,2,3)] <- biotable2[,-c(1,2,3)]*ymaxL/ymaxR
    }

    # Rescaling biotables (datos1)    
    if (ncol(biotable1) >= 3) 
        biotable1[,-c(1,2,3)] <- biotable1[,-c(1,2,3)]*ymaxL/ymaxR

    ## PLOTS

    if (length(samples) == 1) {     # Plot (1 sample) - 2 scales                
        par(mar=c(11,4,2,2))
        barplot(biotable1[c(1,3),],main=samples[1],xlab=NULL,ylab="%features",
            axis.lty=1,legend=FALSE,beside=TRUE,col=c("grey",2),las=2,
            ylim=c(0,ymaxL),border=c("grey",2))
        barplot(biotable1[c(2,4),],main=samples[1],xlab=NULL,ylab="%features",
            axis.lty=1,legend=FALSE,beside=TRUE,col=c(2,1),las=2,density=30,
            ylim=c(0,ymaxL),border=2,add=TRUE)

        # if number of biotypes >= 3 so we have left and right axis
        if (ymaxR > 0) {
            axis(side=4,at=pretty(c(0,ymaxL),n=5),
                labels=round(pretty(c(0,ymaxL),n=5)*ymaxR/ymaxL,1))
            abline(v=9.5,col=3,lwd=2,lty=2)                
        }        
        
        legend(x="topright",bty="n",horiz=FALSE,fill=c("grey",2,2),
            density=c(NA,30,NA),border=c("grey",2,2),
            legend=c("% in genome","detected","% in sample"))
    } 
    else {     # Plot (2 samples)
        par(mar=c(11,4,2,2))
        if (plottype == "persample") {
            barplot(biotable1[c(1,3),],main=samples[1],xlab=NULL,
                ylab="%features",axis.lty=1,legend=FALSE,beside=TRUE,
                col=c("grey",2),las=2,ylim=c(0,ymaxL),border=c("grey",2))
            barplot(biotable1[c(2,4),],main=samples[1],xlab=NULL,
                ylab="%features",axis.lty=1,legend=FALSE,beside=TRUE,
                col=c(2,1),las=2,density=30,ylim=c(0,ymaxL),border=2,add=TRUE)
            
            # if number of biotypes >= 3 so we have left and right axis
            if (ymaxR > 0) {
                axis(side=4,at=pretty(c(0,ymaxL),n=5),
                    labels=round(pretty(c(0,ymaxL),n=5)*ymaxR/ymaxL,1))
                abline(v=9.5,col=3,lwd=2,lty=2)
            }
            
            legend(x="topright",bty="n",horiz=FALSE,fill=c("grey",2,2),
                density=c(NA,30,NA),border=c("grey",2,2),
                legend=c("% in genome","detected","% in sample"))

            # Datos2                
            barplot(biotable2[c(1,3),],main=samples[2],xlab=NULL,
                ylab="%features",axis.lty=1,legend=FALSE,beside=TRUE,
                col=c("grey",4),las=2,ylim=c(0,ymaxL),border=c("grey",4))
            barplot(biotable2[c(2,4),],main=samples[2],xlab=NULL,
                ylab="%features",axis.lty=1,legend=FALSE,beside=TRUE,col=c(4,1),
                las=2,density=30,ylim=c(0,ymaxL),border=4,add=TRUE)
            
            # if number of biotypes >= 3 so we have left and right axis
            if (ymaxR > 0) {
                axis(side=4,at=pretty(c(0,ymaxL),n=5),
                    labels=round(pretty(c(0,ymaxL),n=5)*ymaxR/ymaxL,1))
                abline(v=9.5,col=3,lwd=2,lty=2)
            }

            legend(x="topright",bty="n",horiz=FALSE,fill=c("grey",4,4),
                density=c(NA,30,NA),border=c("grey",4,4),
                legend=c("% in genome","detected","% in sample"))
        }
        
        # A plot comparing two samples with regard to genome and for % in sample
        if (plottype == "comparison") {
            lefttable <- rbind(100*dat$biotables[[samples[1]]][1,]/dat$genome,
                100*dat$biotables[[samples[2]]][1,]/dat$genome)
            righttable <- rbind(dat$biotables[[samples[1]]][2,],
                dat$biotables[[samples[2]]][2,])

            if (length(toplot) > 1) {
                toplot <- toplot[1]
                print("WARNING: More than one biotype was provided, the ",
                    "proportion test will only by applied to the first ",
                    "biotype.")
            }
            
            if ((toplot != 1) && (toplot != "global")) {
                numgenes <- dat$genomesize
                myx <- round(righttable[,toplot]*numgenes/100,0)
                mytest <- prop.test(x=myx,n=rep(numgenes,2),
                    alternative="two.sided")
                if (is.numeric(toplot)) 
                    toplot <- colnames(righttable)[toplot]
            }
            
            asumar <- colSums(righttable)
            asumar <- which(asumar < 0.25)
            if (length(asumar) > 1) {
                righttable <- cbind(righttable[,-asumar],
                    rowSums(righttable[,asumar]))
                colnames(righttable)[ncol(righttable)] <- "Others"
            }

            # Detection in the genome
            bbb <- barplot(lefttable,main="Biotype detection over genome total",
                xlab=NULL,ylab="% detected features",axis.lty=1,legend=FALSE,
                cex.names=0.8,beside=TRUE,col=c(2,4),las=2,density=80,
                border=c(2,4),ylim=c(0,100))
            bbb <- colSums(bbb)/2
            lines(bbb,dat$genome,pch=20,type="o",lwd=2)

            # %detection in the sample                
            barplot(righttable,main="Relative biotype abundance in sample",
                xlab=NULL,ylab="Relative % biotypes",axis.lty=1,legend=FALSE,
                beside=TRUE,col=c(2,4),las=2,border=c(2,4))

            legend(x="topright",bty="n",horiz=FALSE,pch=c(15,15,20),
                lwd=c(NA,NA,1),legend=c(samples,"% in genome"),col=c(2,4,1))

            if ((toplot != 1) && (toplot != "global")) {
                print(paste("Percentage of",toplot,"biotype in each sample:"))
                names(mytest$estimate) <- samples
                print(round(mytest$estimate*100,4))
                print(paste("Confidence interval at 95% for the difference of",
                    "percentages:",samples[1],"-",samples[2]))
                print(round(mytest$conf.int[c(1,2)]*100,4))
                if (mytest$p.value < 0.05) {
                    print(paste("The percentage of this biotype is",
                        "significantly DIFFERENT for these two samples",
                        "(p-value =",signif(mytest$p.value,4),")."))
                } 
                else {
                    print(paste("The percentage of this biotype is NOT",
                        "significantly different for these two samples",
                        "(p-value =",signif(mytest$p.value,4),")."))
                }
            }
        }
    }
    
    # Reset with the default values
    if (!toreport) 
        par(mypar)
}

################################################################################

################################################################################

# cd.plot.R

cd.dat <- function (input,norm=FALSE,refColumn=1) {
    if (inherits(input,"eSet") == FALSE)
        stop("ERROR: The input data must be an eSet object.\n")
    
    if (!is.null(assayData(input)$exprs)) {
        if (ncol( assayData(input)$exprs) < 2)
            stop("ERROR: The input object should have at least two samples.\n")

        datos <- assayData(input)$exprs
    } 
    else {
        if (ncol(assayData(input)$counts) < 2)
            stop("ERROR: The input object should have at least two samples.\n")
        datos <- assayData(input)$counts
    }

    ceros = which(rowSums(datos) == 0)
    hayceros = (length(ceros) > 0)

    if (hayceros) {
        print(paste("Warning:",length(ceros),"features with 0 counts in all",
            "samples are to be removed for this analysis."))
        datos <- datos[-ceros,]
    }
    
    ## scaling data and/or changing 0 to k
    if (norm)
        datos <- sinceros(datos, k = NULL)
    else
        datos <- noirpkm(datos,long=1000,lc=1,k=0.5)

    ## to plot
    data2plot <- log2(datos/datos[,refColumn])

    if (is.numeric(refColumn)) 
        refColumn <- colnames(datos)[refColumn]
    
    print(paste("Reference sample is:",refColumn))

    MsinRef <- as.matrix(data2plot[,-match(refColumn, colnames(data2plot))])
    colnames(MsinRef) <- colnames(data2plot)[-match(refColumn,
        colnames(data2plot))]
    alpha <- 0.05
    alpha <- alpha/ncol(MsinRef)
    nperm <- 10^3

    bootmed <- vapply(seq_len(nperm),function(k) {
        permut <- sample(seq_len(nrow(MsinRef)),replace=TRUE,nrow(MsinRef))
        permut <- as.matrix(MsinRef[permut,])
        permut <- apply(permut,2,median)   
        permut
    },numeric(ncol(MsinRef)))

    if (is.null(dim(bootmed))) 
        bootmed <- t(as.matrix(bootmed))

    bootmed <- t(apply(bootmed,1,quantile,probs=round(c(alpha/2,1-alpha/2),4)))
    diagno <- apply(bootmed,1,function (x) {
        ddd <- (x[1] <= 0) * (0 <= x[2])
        if (ddd == 1)
            ddd = "PASSED" 
        else
            ddd = "FAILED"
        ddd
    })
    bootmed <- cbind(bootmed,diagno)

    rownames(bootmed) <- colnames(MsinRef)
    colnames(bootmed)[3] <- "Diagnostic Test"
    print("Confidence intervals for median of M:")
    print(bootmed)

    if ("FAILED" %in% bootmed[,3])
        print(paste0("Diagnostic test: FAILED. Normalization is required ",
            "to correct this bias."))
    else
        print("Diagnostic test: PASSED.")

    list("data2plot"=data2plot,"refColumn"=refColumn,"DiagnosticTest"=bootmed)
}

cd.plot <- function (dat,samples=NULL,...) {
    refColumn <- dat$refColumn
    dat <- dat$data2plot
    if (is.null(samples)) 
        samples <- seq_len(ncol(dat))
    if (is.numeric(samples))
        samples <- colnames(dat)[samples]
    samples <- setdiff(samples,refColumn)

    if(length(samples) > 12) 
        stop("Please select 12 samples or less to be plotted (excluding ",
            "reference).")
    
    dat <- as.matrix(dat[,samples])
    dat.dens <- apply(dat,2,density,adjust=1.5)
    limY <- c(0,max(vapply(dat.dens,
        function(x) max(x$y,na.rm=TRUE),numeric(1))))

    plot(dat.dens[[1]],xlab="M = log2(sample/refsample)",ylab="Density",
        lwd=2,ylim=limY,type="l",col=miscolores[1],
        main=paste("Reference sample:",refColumn),...)
    abline(v=median(dat[,1],na.rm=TRUE),col=miscolores[1],lty=2)
    if (length(samples) > 1) {
        for (i in 2:length(samples)) {
            lines(dat.dens[[i]],col=miscolores[i],lwd=2)
            abline(v=median(dat[,i],na.rm=TRUE),col=miscolores[i],lty=i+1)
        }
    }
    legend("topleft",legend=samples,
        text.col=miscolores[seq_len(length(samples))],bty="n",lty=1,lwd=2,
        col=miscolores[seq_len(length(samples))])
}

################################################################################

################################################################################

# countsbio.plot.R

countsbio.dat <- function (input,biotypes=NULL,factor=NULL,norm=FALSE)  {
    if (inherits(input,"eSet") == FALSE)
        stop("Error. You must give an eSet object\n")

    if (!is.null(assayData(input)$exprs))
        datos <- assayData(input)$exprs
    else
        datos <- assayData(input)$counts

    depth <- round(colSums(datos)/10^6,1)
    names(depth) <- colnames(datos)
    ceros <- which(rowSums(datos) == 0)
    hayceros <- length(ceros) > 0
        
    if (hayceros) {
        warning("Warning:",length(ceros),"features with 0 counts in all ",
            "samples are to be removed for this analysis.")
        datos0=datos[-ceros,,drop=FALSE]
    } 
    else { 
        datos0=datos
    }
    
    nsam <- NCOL(datos)
    
    if (nsam == 1) {
        datos <- as.matrix(datos)
        datos0 <- as.matrix(datos0)
    }

    # Per condition
    if (is.null(factor)) {    # per sample    
        print("Count distributions are to be computed for:")
        print(colnames(datos))
    } 
    else {    # per condition
        mifactor <- pData(input)[,factor]
        niveles <- levels(mifactor)
        print("Counts per million distributions are to be computed for:")
        print(niveles)
        if (norm) {
            datos <- vapply(niveles,function (k) { 
                rowMeans(as.matrix(datos[,grep(k,mifactor)]))
            },numeric(1))
            datos0 <- vapply(niveles,function (k) { 
                rowMeans(as.matrix(datos0[,grep(k,mifactor)]))
            },numeric(1))
        } 
        else {
            datos <- vapply(niveles,function (k) { 
                10^6*rowMeans(t(t(datos[,grep(k,mifactor)])/colSums(as.matrix(
                    datos[,grep(k,mifactor)]))))
            },numeric(1))
            datos0 <- vapply(niveles,function (k) { 
                10^6*rowMeans(t(t(datos0[,grep(k,mifactor)])/colSums(as.matrix(
                    datos0[,grep(k,mifactor)]))))
            },numeric(1))            
        }
        colnames(datos) <- colnames(datos0) <- niveles
        depth <- vapply(niveles,function (k) {
            paste(range(depth[grep(k,mifactor)]),collapse="-")
        },character(1))
    }

    # Biotypes
    if (!is.null(featureData(input)$Biotype)) {
        if (hayceros) {
            infobio0 <- as.character(featureData(input)$Biotype)[-ceros] 
        } 
        else { 
            infobio0 <- as.character(featureData(input)$Biotype) 
        }
        infobio <- as.character(featureData(input)$Biotype)
    } 
    else { 
        infobio0 <- NULL 
        infobio <- NULL 
    }

    if (!is.null(infobio)) {        
        if(is.null(biotypes)) {
            biotypes <- unique(infobio)
            names(biotypes) <- biotypes        
        }     
        # which genes belong to each biotype
        biog <- lapply(biotypes,function(x) { 
            which(is.element(infobio0,x)) 
        })
        names(biog) <- biotypes
        bionum <- c(NROW(datos0),vapply(biog,length,integer(1)))
        names(bionum) <- c("global",names(biotypes)) 
        bio0 <- which(bionum == 0)
        if (length(bio0) > 0) 
            bionum <- bionum[-bio0]
    } 
    else { 
        biotypes <- NULL
        bionum <- NULL 
    }
    
    # Create the summary matrix information
    if (is.null(bionum)) {
        resumen <- vector("list",length=1)
        names(resumen) <- "global"
    } 
    else {
        resumen <- vector("list",length=length(bionum))
        names(resumen) <- names(bionum)
    }

    cuentas <- c(0,1,2,5,10)

    if (is.null(factor)) {
        if (norm) {
            datosCPM <- datos
        } 
        else {
            datosCPM=10^6 * t(t(datos)/colSums(as.matrix(datos)))
        }
    } else { datosCPM=datos }

    for (i in seq_len(length(resumen))) {
        if (i == 1) {
            datosR=datosCPM             
        } else {             
            if(!is.null(infobio)) {
                datosR <- datosCPM[which(infobio == names(resumen)[i]),]
                if (!is(datosR,"matrix")) {
                    datosR <- t(as.matrix(datosR))
                }
            }
        }

        nfeatures <- nrow(datosR)
        datosR <- datosR[which(rowSums(datosR) > 0),]
        if (!is(datosR,"matrix")) {
            datosR <- t(as.matrix(datosR)) 
        }

        myglobal <- NULL
        mypersample <- NULL

        for (kk in seq_len(length(cuentas))) {
            mypersample <- rbind(mypersample,apply(datosR,2,function (x) { 
                length(which(x > cuentas[kk])) 
            }))
            myglobal <- c(myglobal,sum(apply(datosR,1,function (x) { 
                max(x) > cuentas[kk] 
            })))
        } 

        mypersample <- round(100*mypersample/nfeatures,1)
        mypersample <- rbind(mypersample,depth)
        rownames(mypersample) <- seq_len(nrow(mypersample))
        myglobal <- c(round(100*myglobal/nfeatures,1),nfeatures)

        resumen[[i]] <- data.frame(c(paste("CPM >",cuentas),"depth"),
            mypersample,"total"=myglobal)
        colnames(resumen[[i]])[1] <- names(resumen)[i]
        colnames(resumen[[i]])[2:(ncol(resumen[[i]])-1)] <- colnames(datosR)
    }

    ## results
    cosas <- list("result"=datos0,"bionum"=bionum,"biotypes"=infobio0,
        "summary"=resumen)
    
    cosas
}

countsbio.plot <- function(dat,samples=c(1,2),toplot="global",
    plottype=c("barplot","boxplot"),toreport=FALSE,...) {

    # dat: Data coming from countsbio.dat function
    # samples: Samples to be plotted. If NULL,all samples are plotted.
    # toplot: Name of biotype (including "global") to be plotted.

    mypar <- par(no.readonly=TRUE)
    plottype <- match.arg(plottype)

    ## Preparing data
    if (is.null(samples)) { 
        if (NCOL(dat$result) == 1) {
            samples=1
        } else {
            samples <- seq_len(NCOL(dat$result)) 
        }        
    }
    if (is.numeric(toplot)) 
        toplot <- names(dat$summary)[toplot]
    if (is.numeric(samples) && !is.null(colnames(dat$result))) 
        samples <- colnames(dat$result)[samples]

    if (plottype == "barplot") {
        if ((exists("ylab") && !is.character(ylab)) || !exists("ylab")) 
            ylab <- ""

        datos <- dat$summary[[toplot]]
        mytotal <- as.numeric(datos[,"total"])
        datos <- as.matrix(datos[,samples])
        rownames(datos) <- as.character(dat$summary[[toplot]][,1])

        par(mar=c(6,4,4,2))
        barplot(as.numeric(datos[1,]),col=miscolores[1],las=2,main="",ylab="",
            density=70,ylim=c(0,100),cex.axis=0.8,names.arg="",...)
        for (i in 2:(length(mytotal)-2)) {
            barplot(as.numeric(datos[i,]),col=miscolores[i],las=2,main="",
                ylab="",add=TRUE,density=70,ylim=c(0,100),cex.axis=0.8,
                names.arg="",...)
        }
        
        bp <- barplot(as.numeric(datos[(length(mytotal)-1),]),
            col=miscolores[(length(mytotal)-1)],las=2,
            main=paste(toupper(toplot)," (",mytotal[length(mytotal)],")",
            sep=""),ylab="Sensitivity (%)",add=TRUE,names.arg=colnames(datos),
            cex.axis=0.8,density=70,ylim=c(0,100),cex.names=0.8,...)
        
        for (j in seq_len((length(mytotal)-1))) 
            abline(h=mytotal[j],col=miscolores[j],lwd=2)
        if (length(samples) <= 10) {
            mtext(side=3,text=datos["depth",],adj=0.5,at=bp,cex=0.8)
        } 
        else {
            mtext(side=3,text=datos["depth",],at=bp,cex=0.7,las=2)
        }
        legend("top",rownames(datos)[-length(mytotal)],fill=miscolores,
            density=70,bty="n",ncol=3)
        par(mar=c(5,4,4,4) + 0.1)
    }

    if (plottype == "boxplot") {
        conteos <- as.matrix(dat$result[,samples])
        if (is.numeric(samples)) 
            colnames(conteos) <- colnames(dat$result)[samples]
        else 
            colnames(conteos) <- samples
        num <- dat$bionum[toplot]
        if (is.null(num)) {
            if (toplot == "global") {
                num=nrow(conteos)
            } else {
                num=0
            }
        } 
        infobio <- dat$biotypes

        if (num == 0 && toplot != "global") 
            stop("Error: No data available. Please,change toplot parameter.")

        if ((exists("ylab") && !is.character(ylab)) || !exists("ylab"))
            ylab <- "Expression values"

        ## Plots            
        if (length(samples) == 1) {
            escala <- logscaling(conteos,base=2)
            if (is.null(infobio)) {
                boxplot(escala$data,col=miscolores[1],ylab=ylab,main="",
                    yaxt="n",...)
            } 
            else {
                par(mar=c(10,4,4,2))    
                boxplot(escala$data ~ infobio,col=miscolores,ylab=ylab,
                    main=colnames(conteos),las=2,cex.axis=0.8,cex.lab=0.9,
                    yaxt="n",...)
                cuantos <- dat$bionum[-1]
                cuantos <- cuantos[sort(names(cuantos))]
                mtext(cuantos,3,at=seq_len(length(cuantos)),cex=0.6,las=2)
            }
        } 
        else {
            if (toplot != "global") 
                conteos=conteos[which(infobio == toplot),]
            
            escala <- logscaling(conteos,base=2)
            main <- paste(toupper(toplot)," (",num,")",sep="")

            par(mar=c(6,4,2,2))    
            boxplot(escala$data,col=miscolores,ylab=ylab,main=main,las=2,
                cex.lab=0.9,cex.axis=0.8,yaxt="n",...)
        }        
        axis(side=2,at=escala$at,labels=escala$labels)
    }

    if (!toreport)
        par(mypar)
}

################################################################################

################################################################################

# dat.R

dat <- function(input,type = c("biodetection","cd","countsbio","GCbias",
    "lengthbias","saturation","PCA"),k=0,ndepth=6,factor=NULL,norm=FALSE,
    refColumn=1,logtransf = FALSE) {

    type <- match.arg(type)
    if (type == "biodetection") {
        output <- new("Biodetection",
            dat=biodetection.dat(input,factor=factor,k=k))        
    }
    
    if (type == "cd") {
        output <- new("CD",dat=cd.dat(input,norm=norm,refColumn=refColumn))
    }

    if (type == "countsbio") {
        output <- new("CountsBio",
            dat=countsbio.dat(input,factor=factor,norm=norm))
    }

    #if (type == "GCbias") {
    #    output <- new("GCbias",
    #        dat=GC.dat(input,factor=factor,norm=norm))
    #}

    #if (type == "lengthbias") {
    #    output <- new("lengthbias",
    #        dat=length.dat(input,factor=factor,norm=norm))
    #}
    
    if (type == "saturation") {
        output <- new("Saturation",dat=saturation.dat(input,k=k,ndepth=ndepth))
    }
    
    #if (type == "PCA") {
    #    output=new("PCA",dat=PCA.dat(input,norm=norm,logtransf=logtransf))
    #}
    
    output
}

################################################################################

################################################################################

# DE.plot.R

DE.plot <- function(output,q=NULL,graphic=c("MD","expr","chrom","distr"),
    pch=20,cex=0.5,col=1,pch.sel=1,cex.sel=0.6,col.sel=2,log.scale=TRUE,
    chromosomes=NULL,join=FALSE,...) {
    mypar <- par(no.readonly=TRUE)

    if (!is(output,"Output"))
        stop("Error. Output argument must contain an object generated by ",
            "noiseq or noiseqbio functions.\n")

    graphic <- match.arg(graphic)    
    noiseqbio <- "theta" %in% colnames(output@results[[1]])[c(1,2,3,4)]
    
    ## MD plot
    if (graphic == "MD") {
        if (noiseqbio) {   
            M <- output@results[[1]][,"log2FC"]
            D <- abs(output@results[[1]][,1] - output@results[[1]][,2])+1
            names(M) <- names(D) <- rownames(output@results[[1]])
            
            plot(M,D,pch=pch,xlab="M",ylab="D",cex=cex,col=col,log="y",...)
            
            if(!is.null(q)) {
                mySelection <- rownames(degenes(output,q))
                points(M[mySelection],D[mySelection],col=col.sel,pch=pch.sel,
                    cex=cex.sel)
            }
        }
        else {
            plot(output@results[[1]][,"M"],(1+output@results[[1]][,"D"]),
                pch=pch,xlab="M",ylab="D",cex=cex,col=col,log="y",...)
            if (!is.null(q)) {
                mySelection <- rownames(degenes(output,q))
                points(output@results[[1]][mySelection,"M"],
                    output@results[[1]][mySelection,"D"]+1,col=col.sel,
                    pch=pch.sel,cex=cex.sel)
            }
        }
    }
    
    ## Expression plot: Condition1 vs Condition2
    else if (graphic == "expr") {
        data <- cbind(output@results[[1]][1],output@results[[1]][2])
        rownames(data) <- rownames(output@results[[1]])
        colnames(data) <- colnames(output@results[[1]][c(1,2)])

        if (log.scale) {
            k <- min(data,na.rm=TRUE)
            escala <- logscaling(data,base=2,k=k)
            
            plot(escala$data,pch=pch,cex=cex,col=col,yaxt="n",xaxt="n",...)
            axis(side=1,at=escala$at,labels=escala$labels)
            axis(side=2,at=escala$at,labels=escala$labels)
            
            if(!is.null(q)) {
                mySelection <- rownames(degenes(output,q))    
                points(escala$data[mySelection,],col=col.sel,pch=pch.sel,
                    cex=cex.sel)
            }
        } 
        else {
            plot(data,pch=pch,cex=cex,col=col,...)            
            if(!is.null(q)) {
                mySelection <- rownames(degenes(output,q))
                points(data[mySelection,],col=col.sel,pch=pch.sel,cex=cex.sel)
            }
        }
    }
    
    ## MANHATTAN PLOT
    else if (graphic == "chrom") {
        mydata <- data.frame(as.character(output@results[[1]][,"Chrom"]),
            output@results[[1]][,c("GeneStart","GeneEnd")],
            output@results[[1]][,c(1,2)])
        mydata <- na.omit(mydata)
        colnames(mydata) <- c("chr","start","end",colnames(mydata)[-c(1,2,3)])
        
        if (is.null(chromosomes)) {    # todos los cromosomas
            chromosomes <- unique(mydata$chr)
            chromosomes <- sort(chromosomes)
        }
        
        print("REMEMBER. You are plotting these chromosomes and in this order:")
        print(chromosomes)
        
        # logarithmic scale
        if (log.scale) {
            if (min(mydata[,-c(1,2,3)],na.rm=TRUE) < 1) {
                kk <- -min(mydata[,-c(1,2,3)],na.rm=TRUE)+1
            } 
            else { 
                kk <- 0 
            }
            mydata[,-c(1,2,3)] <- log2(mydata[,-c(1,2,3)]+kk)
        }
        
        # Selecting chromosomes and ordering positions
        ordenat <- NULL
        for (cromo in chromosomes) {
            myselec <- mydata[mydata[,"chr"] == cromo,]
            myselec <- myselec[order(myselec[,"start"]),]
            ordenat <- rbind(ordenat,myselec)
        }
        
        sel.ord <- NULL
        
        if (!is.null(q)) {
            # up-regulated in first condition
            cond1 <- rownames(degenes(output,q,M="up"))
            # up-regulated in second condition
            cond2 <- rownames(degenes(output,q,M="down"))
            cond1 <- (rownames(ordenat) %in% cond1)*1
            cond2 <- (rownames(ordenat) %in% cond2)*1
            sel.ord <- cbind(cond1,cond2)
            rownames(sel.ord) <- rownames(ordenat)
        }
        
        chr.long <- aggregate(ordenat[,"end"],
            by=list(as.character(ordenat[,"chr"])),max)
        chr.long <- chr.long[match(chromosomes,chr.long[,1]),]
        
        if (join) { # si todos los cromosomas van en el mismo plot
            total.long <- sum(as.numeric(chr.long$x))
            chr.start <- cumsum(c(1,chr.long$x[-length(chr.long$x)]))
            names(chr.start) <- chr.long[,1]
            plot(c(1,total.long),c(-max(ordenat[,5]),max(ordenat[,4])),
                type="n",xlab="",ylab="Expression data",xaxt="n")
            axis(side=1,at=chr.start,labels=chr.long[,1],font=2)
            abline(h=0,lty=2,lwd=0.5)        
            
            for (ch in chromosomes) {
                dat.chr <- ordenat[which(ordenat[,"chr"] == ch),]
                dat.chr[,c("start","end")] <- dat.chr[,c("start","end")] +
                    chr.start[ch] - 1
                rect(xleft=dat.chr[,"start"],ybottom=0,xright=dat.chr[,"end"],
                    ytop=dat.chr[,4],col="grey",border=NA)
                rect(xleft=dat.chr[,"start"],ybottom=-dat.chr[,5],
                    xright=dat.chr[,"end"],ytop=0,col="grey",border=NA)

                if (!is.null(q)) {                    
                    aux <- which(rownames(sel.ord) %in% rownames(dat.chr))
                    sel.chr1 <- dat.chr[,4]*sel.ord[aux,1]
                    sel.chr2 <- -dat.chr[,5]*sel.ord[aux,2]
                    rect(xleft=dat.chr[,"start"],ybottom=0,
                        xright=dat.chr[,"end"],ytop=sel.chr1,col=2,border=NA)
                    rect(xleft=dat.chr[,"start"],ybottom=sel.chr2,
                        xright=dat.chr[,"end"],ytop=0,col=3,border=NA)
                }

                segments(x0=dat.chr[,"start"],y0=0,x1=dat.chr[,"end"],y1=0,
                    col=4,lwd=0.5) # annotated genes
            }
            text(c(1,1),0.9*c(max(ordenat[,4]),-max(ordenat[,5])),
                colnames(mydata)[4:5],font=3,adj=0,col=2:3)
            # a plot for each chromosome
        } 
        else {        
            num.chr <- length(chromosomes)
            k <- 20
            long.prop <- round((chr.long$x / min(chr.long$x))*k,0)
            while (max(long.prop)*num.chr > 500 | max(long.prop) > 50) {
                k <- k-1
                long.prop <- round((chr.long$x / min(chr.long$x))*k,0)
            }
            forlayout <- matrix(0,num.chr,max(long.prop))
            for (i in seq_len(num.chr)) {
                forlayout[i,seq_len(long.prop[i])] <- i
            }
            layout(forlayout)
            
            if (num.chr > 1)
                par(mar=c(2,4.1,0.1,0.1))
            miylim <- c(-max(ordenat[,5],na.rm=TRUE),
                max(ordenat[,4],na.rm=TRUE))
            
            for (i in seq_len(num.chr)) {
                apintar <- ordenat[which(ordenat[,"chr"] == chromosomes[i]),2:5]
                plot(c(1,chr.long[i,2]),miylim,type="n",xlab="",
                    ylab=chromosomes[i],xaxt="n",font.lab=2,cex.lab=1.3)
                rect(xleft=apintar[,"start"],ybottom=0,xright=apintar[,"end"],
                    ytop=apintar[,3],col="grey",border=NA)
                rect(xleft=apintar[,"start"],ybottom=-apintar[,4],
                    xright=apintar[,"end"],ytop=0,col="grey",border=NA)
                
                if (!is.null(q)) {
                    aux <- which(rownames(sel.ord) %in% rownames(apintar))
                    asel <- sel.ord[aux,]*apintar[,3:4]
                    rect(xleft=apintar[,"start"],ybottom=0,
                        xright=apintar[,"end"],ytop=asel[,1],col=2,border=NA)
                    rect(xleft=apintar[,"start"],ybottom=-asel[,2],
                        xright=apintar[,"end"],ytop=0,col=3,border=NA)
                }            
                abline(h=0,lty=2,lwd=0.5)
                segments(x0=apintar[,"start"],y0=0,x1=apintar[,"end"],y1=0,
                    col=4,lwd=0.5) # annotated genes
                etiq <- mypretty(c(1,chr.long[i,2]),n=10)
                axis(side=1,at=etiq,labels=etiq)
                text(c(1,1),0.9*miylim,colnames(mydata)[5:4],font=3,adj=0,
                    col=3:2)
            }
        }
    }
    
    ## DEG distribution across biotypes/chromosomes
    else if (graphic == "distr") {        
        if(!is.null(q)) { # Computing DEG
            mySelection <- rownames(degenes(output,q))
            detect <- rownames(output@results[[1]]) %in% mySelection
        }
        else {
            stop("You must specify a valid value for q\n")
        }

        if ("Chrom" %in% colnames(output@results[[1]])) {
            numplot <- 1
            infobio <- output@results[[1]][,"Chrom"]
            genome <- 100*table(infobio)/sum(table(infobio))
            ordre <- order(genome,decreasing=TRUE)            
            perdet1 <- genome*table(infobio,detect)[names(
                genome),2]/table(infobio)[names(genome)]
            perdet2 <- 100*table(infobio,detect)[names(
                genome),2]/sum(table(infobio,detect)[,2])
            ceros <- rep(0,length(genome))
            biotable1 <- as.matrix(rbind(genome[ordre],perdet1[ordre],
                perdet2[ordre],ceros))
            rownames(biotable1) <- c("genome","degVSgenome","deg","ceros")
            if (!is.null(chromosomes))
                biotable1=biotable1[,chromosomes]
            ymaxL1 <- ceiling(max(biotable1,na.rm=TRUE))
        } 
        else { 
            numplot <- 0 
        }
        
        if ("Biotype" %in% colnames(output@results[[1]])) {
            numplot <- c(numplot,1)
            infobio <- output@results[[1]][,"Biotype"]
            genome <- 100*table(infobio)/sum(table(infobio))
            ordre <- order(genome,decreasing=TRUE)            
            perdet1 <- genome*table(infobio,detect)[names(
                genome),2]/table(infobio)[names(genome)]
            perdet2 <- 100*table(infobio,detect)[names(
                genome),2]/sum(table(infobio,detect)[,2])
            ceros <- rep(0,length(genome))
            biotable2 <- as.matrix(rbind(genome[ordre],perdet1[ordre],
                perdet2[ordre],ceros))
            rownames(biotable2) <- c("genome","degVSgenome","deg","ceros")
            higher2 <- which(biotable2[1,] > 2)
            lower2 <- which(biotable2[1,] <= 2)
            if (length(higher2) > 0) {       
                ymaxL2 <- ceiling(max(biotable2[,higher2],na.rm=TRUE)) 
                if (length(lower2) > 0) {
                    ymaxR2 <- ceiling(max(biotable2[,lower2],na.rm=TRUE))
                    biotable2[,lower2] <- biotable2[,lower2]*ymaxL2/ymaxR2
                }
                else { 
                    ymaxR2 <- ymaxL2 
                }
                
            } 
            else { 
                ymaxR2 <- ceiling(max(biotable2[,lower2],na.rm=TRUE))
                ymaxL2 <- ymaxR2                             
            }
        } 
        else { 
            numplot <- c(numplot,0) 
        }
        
        # Plot
        if (sum(numplot) == 0) 
            stop("Biotype or chromosome information is needed for this plot\n")
        
        if (sum(numplot) == 1) {    # 1 Plot 
            par(mar=c(10,4,2,2))
            if (numplot[1] == 1) {
                barplot(biotable1[c(1,3),],
                    main="DEG distribution across chromosomes",
                    xlab=NULL,ylab="%features",axis.lty=1,legend=FALSE,
                    beside=TRUE,col=c("grey",2),las=2,ylim=c(0,ymaxL1),
                    border=c("grey",2))
                barplot(biotable1[c(2,4),],
                    main="DEG distribution across chromosomes",
                    xlab=NULL,ylab="%features",axis.lty=1,legend=FALSE,
                    beside=TRUE,col=c(2,1),las=2,density=30,ylim=c(0,ymaxL1),
                    xborder=2,add=TRUE)
                legend(x="topright",bty="n",horiz=FALSE,fill=c("grey",2,2),
                    density=c(NA,30,NA),border=c("grey",2,2),
                    legend=c("%Chrom in genome","%DEG in Chrom",
                    "%Chrom in DEG"))
            }
            
            if (numplot[2] == 1) {
                barplot(biotable2[c(1,3),],
                    main="DEG distribution across biotypes",
                    xlab=NULL,ylab="%features",axis.lty=1,legend=FALSE,
                    beside=TRUE,col=c("grey",4),las=2,ylim=c(0,ymaxL2),
                    border=c("grey",4))
                barplot(biotable2[c(2,4),],
                    main="DEG distribution across biotypes",
                    xlab=NULL,ylab="%features",axis.lty=1,legend=FALSE,
                    beside=TRUE,col=c(4,1),las=2,density=30,ylim=c(0,ymaxL2),
                    border=4,add=TRUE)
                axis(side=4,at=pretty(c(0,ymaxL2),n=5),
                    labels=round(pretty(c(0,ymaxL2),n=5)*ymaxR2/ymaxL2,1))
                if (ymaxR2 != ymaxL2) {
                    abline(v=3*length(higher2) + 0.5,col=3,lwd=2,lty=2)
                }
                legend(x="topright",bty="n",horiz=FALSE,fill=c("grey",4,4),
                    density=c(NA,30,NA),border=c("grey",4,4),
                    legend=c("%Biotype in genome","%DEG in Biotype",
                    "%Biotype in DEG"))
            }
        }
        
        if (sum(numplot) == 2) {    # 2 Plots 
            par(mar=c(10,4,2,2),mfrow=c(1,2))
            
            # Chromosomes
            barplot(biotable1[c(1,3),],
                main="DEG distribution across chromosomes",
                xlab=NULL,ylab="%features",axis.lty=1,legend=FALSE,beside=TRUE,
                col=c("grey",2),las=2,ylim=c(0,ymaxL1),border=c("grey",2))
            barplot(biotable1[c(2,4),],
                main="DEG distribution across chromosomes",xlab=NULL,
                ylab="%features",axis.lty=1,legend=FALSE,beside=TRUE,col=c(2,1),
                las=2,density=30,ylim=c(0,ymaxL1),border=2,add=TRUE)
            legend(x="topright",bty="n",horiz=FALSE,fill=c("grey",2,2),
                density=c(NA,30,NA),border=c("grey",2,2),
                legend=c("%Chrom in genome","%DEG in Chrom","%Chrom in DEG"))
            
            # Biotypes
            barplot(biotable2[c(1,3),],main="DEG distribution across biotypes",
                xlab=NULL,ylab="%features",axis.lty=1,legend=FALSE,beside=TRUE,
                col=c("grey",4),las=2,ylim=c(0,ymaxL2),border=c("grey",4))
            barplot(biotable2[c(2,4),],main="DEG distribution across biotypes",
                xlab=NULL,ylab="%features",axis.lty=1,legend=FALSE,beside=TRUE,
                col=c(4,1),las=2,density=30,ylim=c(0,ymaxL2),border=4,add=TRUE)
            axis(side=4,at=pretty(c(0,ymaxL2),n=5),
                labels=round(pretty(c(0,ymaxL2),n=5)*ymaxR2/ymaxL2,1))
            if (ymaxR2 != ymaxL2) {
                abline(v=3*length(higher2) + 0.5,col=3,lwd=2,lty=2)
            }        
            legend(x="topright",bty="n",horiz=FALSE,fill=c("grey",4,4),
                density=c(NA,30,NA),border=c("grey",4,4),
                legend=c("%Biotype in genome","%DEG in Biotype",
                    "%Biotype in DEG"))
        }
    }
    par(mypar)
}

## Auxiliar function
mypretty <- function(x,n=5) {    
    mywidth <- diff(x)
    mybin <- ceiling(mywidth/(n-1))
    mylabels0 <- x[1] + mybin*(0:(n-1))
    ndig <- nchar(as.character(mylabels0))
    mylabels <- mylabels0
    k <- 0
    for (i in 2:(n-1)) {
        mylabels[i] <- (round(mylabels0[i]/10^(ndig[i]-1),k))*10^(ndig[i]-1)
        while (mylabels[i] == mylabels[i-1]) {
            k <- k+1
            mylabels[i] <- (round(mylabels0[i]/10^(ndig[i]-1),k))*10^(ndig[i]-1)
        }
    }
    mylabels
}

################################################################################

################################################################################

# degenes.R

degenes <- function (object,q=0.95,M=NULL) {
    if (!is(object,"Output"))
        stop("You must give the object returned by the noiseq function\n")

    x <- object@results[[1]]
    noiseqbio="theta" %in% colnames(x)[seq_len(4)]

    if (noiseqbio) {
        y <- na.omit(x[c("theta","prob")])
        colnames(y)[1]="M"
    } else {
        y <- na.omit(x[c("M","D","prob")])
    }

    if (is.null(M)) {
        losdeg <- y[y[,"prob"] > q,]
        print(paste(dim(losdeg)[1],"differentially expressed features"))

    }
    else if (M == "up") {
        estos <- y[y[,"M"] > 0,]
        losdeg <- estos[estos[,"prob"] > q,]
        print(paste(dim(losdeg)[1],
            "differentially expressed features (up in first condition)"))
    } 
    else if (M == "down") {
        estos <- y[y[,"M"] < 0,]
        losdeg <- estos[estos[,"prob"] > q,]
        print(paste(dim(losdeg)[1],
            "differentially expressed features (down in first condition)"))
    }
    else {
        stop("ERROR! Value for parameter M is not valid. Please,choose ",
            "between NULL,'up' or 'down'")
    }
    
    # Restore the object with the same "results" structure
    losdeg <- x[rownames(losdeg),]
    losdeg[order(losdeg[,"prob"],decreasing=TRUE),]
}

################################################################################

################################################################################

# fewrepliactes.R

share.info = function (mydata, n1, n2, r, nclust)   {
    # clustering data by k-means algorithm
    # 1.a) Normalized data
    gc()
    cl <- suppressWarnings(kmeans(mydata,nclust,nstart=25,iter.max=nclust + 30))

    cat("...k-means clustering done\n")
    cat(paste("Size of",nclust,"clusters:\n"))
    print(cl$size)
                        
    # Creating pseudo-data
    cluster.data <- lapply(seq_len(nclust),function (k) { 
        mydata[cl$cluster == k,]
    })
        
    # Resampling
    npermu <- cl$size*r
    npermu <- vapply(npermu,function (x) min(x,1000),numeric(1))

    cat("Resampling cluster...")

    myres=vector("list",length=nclust)

    for (i in seq_len(nclust)) {
        print(i)

        #if (cl$size[i] > 1) {     # OPTION 2.A
        # OPTION 2.C: small clusters
        if (cl$size[i] > 1 && cl$size[i] < 1000) {
            #myres[[i]]=t(sapply(seq_len(npermu[i]),function (j) {
            #    permu=sample(cluster.data[[i]])
            #    nn1=n1*cl$size[i]
            #    nn2=n2*cl$size[i]
            #    mean1=mean(permu[seq_len(nn1)])
            #    mean2=mean(permu[(nn1+1):(nn1+nn2)])
            #    sd1=sd(as.numeric(permu[seq_len(nn1)]))
            #    sd2=sd(as.numeric(permu[(nn1+1):(nn1+nn2)]))
            #    data.frame("M"=log2(mean1/mean2),"D"=mean1-mean2,
            #        "M.sd"=sqrt(sd1^2/(mean1^2*log(2)^2*nn1) + 
            #        sd2^2/(mean2^2*log(2)^2*nn2)),
            #        "D.sd"=sqrt(sd1^2/sqrt(nn1) + sd2^2/sqrt(nn2)))
            #}))
            myres[[i]]=t(vapply(seq_len(npermu[i]),function (j) {
                permu=sample(cluster.data[[i]])
                nn1=n1*cl$size[i]
                nn2=n2*cl$size[i]
                mean1=mean(permu[seq_len(nn1)])
                mean2=mean(permu[(nn1+1):(nn1+nn2)])
                sd1=sd(as.numeric(permu[seq_len(nn1)]))
                sd2=sd(as.numeric(permu[(nn1+1):(nn1+nn2)]))
                data.frame("M"=log2(mean1/mean2),"D"=mean1-mean2,
                    "M.sd"=sqrt(sd1^2/(mean1^2*log(2)^2*nn1) + 
                    sd2^2/(mean2^2*log(2)^2*nn2)),
                    "D.sd"=sqrt(sd1^2/sqrt(nn1) + sd2^2/sqrt(nn2)))
            },vector("list",4)))
        }
        
        if (cl$size[i] >= 1000) {    # OPTION 2.C & 2.D: big clusters
            # Option 2.D: clustering big clusters
            cl2 <- kmeans(cluster.data[[i]],nclust,nstart=25,
                iter.max=nclust + 20)
            cat(paste("Size of",nclust,"subclusters of cluster:",i)); cat("\n")
            print(cl2$size)
            subcluster.data <- lapply(seq_len(nclust),function (k) { 
                cluster.data[[i]][cl2$cluster == k,] 
            })
            npermu2 <- cl2$size*r
            npermu2 <- vapply(npermu2,function (x) min(x,1000),numeric(1))
            ## modified to reduce the number of permutations to be done
            myres2 <- vector("list",length=nclust)
            
            for (h in seq_len(nclust)) {
                if (cl2$size[h] > 1) {
                    #myres2[[h]] <- t(sapply(seq_len(npermu2[h]),function (j) {
                    #    permu=sample(subcluster.data[[h]])
                    #    nn1 <- n1*cl2$size[h]
                    #    nn2 <- n2*cl2$size[h]             
                    #    mean1 <- mean(permu[seq_len(nn1)])
                    #    mean2 <- mean(permu[(nn1+1):(nn1+nn2)])
                    #    sd1 <- sd(as.numeric(permu[seq_len(nn1)]))
                    #    sd2 <- sd(as.numeric(permu[(nn1+1):(nn1+nn2)]))
                    #    data.frame("M"=log2(mean1/mean2),"D"=mean1-mean2,
                    #        "M.sd"=sqrt(sd1^2/(mean1^2*log(2)^2*nn1) +
                    #        sd2^2/(mean2^2*log(2)^2*nn2)),
                    #        "D.sd"=sqrt(sd1^2/sqrt(nn1) + sd2^2/sqrt(nn2)))
                    #}))
                    myres2[[h]] <- t(vapply(seq_len(npermu2[h]),function (j) {
                        permu=sample(subcluster.data[[h]])
                        nn1 <- n1*cl2$size[h]
                        nn2 <- n2*cl2$size[h]             
                        mean1 <- mean(permu[seq_len(nn1)])
                        mean2 <- mean(permu[(nn1+1):(nn1+nn2)])
                        sd1 <- sd(as.numeric(permu[seq_len(nn1)]))
                        sd2 <- sd(as.numeric(permu[(nn1+1):(nn1+nn2)]))
                        data.frame("M"=log2(mean1/mean2),"D"=mean1-mean2,
                            "M.sd"=sqrt(sd1^2/(mean1^2*log(2)^2*nn1) +
                            sd2^2/(mean2^2*log(2)^2*nn2)),
                            "D.sd"=sqrt(sd1^2/sqrt(nn1) + sd2^2/sqrt(nn2)))
                    },vector("list",4)))
                }
            }
            myres[[i]] <- do.call("rbind",myres2)
        }
    }                                        

    # Computing Zr for noise
    cat("Computing Z for noise...\n")

    # 4.A) a0: Global for all R*G permutations
    myres <- do.call("rbind",myres)
    
    a0.M <- quantile(as.numeric(myres[,"M.sd"]),probs=0.9,na.rm=TRUE)
    a0.D <- quantile(as.numeric(myres[,"D.sd"]),probs=0.9,na.rm=TRUE)
    
    M <- as.numeric(myres[,"M"])/(a0.M + as.numeric(myres[,"M.sd"]))
    D <- as.numeric(myres[,"D"])/(a0.D + as.numeric(myres[,"D.sd"]))
    (M + D)/2
}

################################################################################

# filter.low.counts.R

CV = function(data) { 
    100*sd(data,na.rm=TRUE)/mean(data,na.rm=TRUE)
}

filtered.data <- function(dataset,factor,norm=TRUE,depth=NULL,method=1,
    cv.cutoff=100,cpm=1,p.adj="fdr") {    
    dataset0=dataset[rowSums(dataset) > 0,]
    dataset=dataset0

    if ((method == 3) && (norm)) {
        if (is.null(depth)) {
            stop("ERROR: Sequencing depth for each column in dataset must ",
                "be provided.\n")
        }
        dataset <- t(t(dataset0)/(colSums(dataset0)/depth))
    }
    
    if ((method < 3) && (!norm)) {
        dataset <- 10^6*t(t(dataset0)/colSums(dataset0))
    }

    grupos <- unique(factor)
    cumple <- NULL

    cat("Filtering out low count features...\n")

    for (gg in grupos) {        
        datos <- as.matrix(dataset[,grep(gg,factor)])

        if (method == 1) {
            if (ncol(datos) == 1) {
                cumplecond=(datos > cpm)
            } 
            else {
                cumplecond <- 
                    (apply(datos,1,CV) < cv.cutoff)*(rowMeans(datos) > cpm)
                cumplecond[which(is.na(cumplecond) == TRUE)] <- 0 
            }
            cumple <- cbind(cumple,cumplecond)
        }

        if (method == 2) {
            if (ncol(datos) == 1) 
                stop("ERROR: At least 2 replicates per condition are ",
                    "required to apply this method.")
            mytest <- apply(datos,1,function (x) { 
                suppressWarnings(wilcox.test(x,alternative="greater",
                    conf.int=FALSE,mu=0))$"p.value" 
            })
            mytest <- p.adjust(mytest,method=p.adj)
            cumple <- cbind(cumple,1*(mytest < 0.05))    
        }
        
        if (method == 3) {                     
            p0=cpm/10^6                                
            mytest <- apply(datos,1,function (x) 
                suppressWarnings(prop.test(sum(x),n=sum(datos),p=p0,
                    alternative="greater"))$"p.value")
            mytest <- p.adjust(mytest,method=p.adj)
            cumple <- cbind(cumple,1*(mytest < 0.05))                     
        }
    }

    cumple <- which(rowSums(as.matrix(cumple)) >= 1) 

    cat(paste(length(cumple),"features are to be kept for differential ",
        "expression analysis with filtering method",method))
    cat("\n")

    dataset0[cumple,]
}

################################################################################

################################################################################

# makeASCAdesign.R

make.ASCA.design <- function(x) {
    x <- as.factor(x)
    levels <- unique(x)
    n <- length(x)
    p <- length(levels)
    Design <- matrix(0,nrow=n,ncol=p)
    for (i in seq_len(n)){
        for (j in seq_len(p)){
            if (x[i]==levels[j]) {
                Design[i,j]=1
            }
        }
    }
    colnames(Design)<-levels
    output<-Design
    output
}

################################################################################

################################################################################

# MD.R

MD <- function (dat=dat,selec=c(seq_len(nrow(dat)))) {
    pares <- as.matrix(combn(ncol(dat), 2))
    if (NCOL(pares) > 30) {  
        sub30 <- sample(seq_len(NCOL(pares)),size=30,replace=FALSE)
        pares <- pares[,sub30]
    }
    mm <- NULL
    dd <- NULL
    for (i in seq_len(ncol(pares))) {
        a <- dat[selec,pares[1,i]]
        b <- dat[selec,pares[2,i]]
        mm <- cbind(mm,log(a/b,2))
        dd <- cbind(dd,abs(a-b))
    }
    list("M"=mm,"D"=dd)
}

MDbio <- function (dat=dat,selec= c(seq_len(nrow(dat))),param=NULL,a0per=0.9) {
    pares <- as.matrix(combn(ncol(dat), 2))
    mm <- NULL
    dd <- NULL
    for (i in seq_len(ncol(pares))) {
        a <- dat[selec,pares[1,i]]
        b <- dat[selec,pares[2,i]]
        mm <- cbind(mm, log(a/b, 2))
        dd <- cbind(dd, (a-b))  
    }

    ## Correcting (M,D) 
    sd.M <- sqrt(param$sd[,1]^2/(dat[,1]^2*log(2)^2*param$n[1]) + 
        param$sd[,2]^2/(dat[,2]^2*log(2)^2*param$n[2]))

    sd.D = sqrt(param$sd[,1]^2/param$n[1] + param$sd[,2]^2/param$n[2])

    if(is.null(a0per)) {
        a0.M = a0.D = 0
    } 
    else {
        if (a0per == "B") {
            B <- 100
            a0.M <- B*max(sd.M,na.rm=TRUE)
            a0.D <- B*max(sd.D,na.rm=TRUE)
        }
        else {
            a0per <- as.numeric(a0per)
            a0.M <- quantile(sd.M, probs = a0per, na.rm = TRUE)
            a0.D <- quantile(sd.D, probs = a0per, na.rm = TRUE)
        }
    }

    mm <- mm / (a0.M + sd.M)
    dd <- dd / (a0.D + sd.D)

    # Results
    list("M"=mm,"D"=dd)
}

################################################################################

################################################################################

# noiseq.R

noiseq <- function (input,k=0.5,norm=c("rpkm","uqua","tmm","n"),
    replicates=c("technical","biological","no"),factor=NULL,conditions=NULL,
    pnr=0.2,nss=5,v=0.02,lc=0) {
    if (inherits(input,"eSet") == FALSE)
        stop("Error. You must give an object generated by the readData ",
            "function\n")

    if (is.null(factor))
        stop("Error. You must specify the factor to know which conditions ",
            "you wish to compare. Please,give the argument 'factor'.\n")

    replicates <- match.arg(replicates)
    if (replicates == "biological") 
        print("WARNING: Your experiment has biological replicates. You ",
            "should consider using NOISeqBIO instead of NOISeq.")
    norm <- match.arg(norm)
    
    # (M,D) for signal and noise
    print("Computing (M,D) values...")
    miMD <- allMD(input,factor,k=k,replicates=replicates,norm=norm,
        conditions=conditions,pnr=pnr,nss=nss,v=v,lc=lc)

    ## Probability of differential expression

    print("Computing probability of differential expression...")
    prob.concounts <- probdeg(miMD$Ms,miMD$Ds,miMD$Mn,miMD$Dn)$prob

    if (!is.null(assayData(input)$exprs))
        todos <- rownames(as.matrix(assayData(input)$exprs))
    else
        todos <- rownames(as.matrix(assayData(input)$counts))
    
    prob <- prob.concounts[todos]
    names(prob) <- todos

    ## Results
    resultat <- data.frame("level_1"=miMD$Level1,"level_2"=miMD$Level2,
        "M"=miMD$Ms,"D"=miMD$Ds,"prob"=prob)
    rownames(resultat) <- todos

    # We change the name of the conditions to "name_mean"
    colnames(resultat)[1] <- 
        paste(unlist(strsplit(miMD$comp," "))[1],"mean",sep="_")
    colnames(resultat)[2] <- 
        paste(unlist(strsplit(miMD$comp," "))[3],"mean",sep="_")

    resultat <- data.frame(resultat,"ranking"=ranking(resultat)$statistic)
    if (!is.null(featureData(input)@data$Length))
        resultat <- data.frame(resultat,
            "Length"=as.numeric(as.character(featureData(input)@data[todos,
            "Length"])),stringsAsFactors=FALSE)
    if (!is.null(featureData(input)@data$GC))
        resultat <- data.frame(resultat,"GC"=as.numeric(as.character(
            featureData(input)@data[todos,"GC"])),stringsAsFactors=FALSE)
    if (!is.null(featureData(input)@data$Chromosome))
        resultat <- data.frame(resultat,"Chrom"=as.character(
            featureData(input)@data$Chromosome),"GeneStart"=as.numeric(
            as.character(featureData(input)@data$GeneStart)),
            "GeneEnd"=as.numeric(as.character(featureData(input)@data$GeneEnd)),
            stringsAsFactors=FALSE)
    if (!is.null(featureData(input)@data$Biotype))
        resultat <- data.frame(resultat,"Biotype"=as.character(
            featureData(input)@data[todos,"Biotype"]),stringsAsFactors=FALSE)
    #resultat[order(resultat[,5],decreasing=TRUE),]
    
    Output(data=list(resultat),method=norm,k=miMD$k,lc=lc,factor=factor,v=v,
        nss=nss,pnr=pnr,comparison=miMD$comp,replicates=replicates)
}

################################################################################

################################################################################

# noiseqbio.R

noiseqbio <- function (input,k=0.5,norm=c("rpkm","uqua","tmm","n"),nclust=15,
    plot=FALSE,factor=NULL,conditions=NULL,lc=0,r=50,adj=1.5,a0per=0.9,filter=1,
    depth=NULL,cv.cutoff=500,cpm=1) {
    if (inherits(input,"eSet") == FALSE)
        stop("ERROR: You must give an object generated by the readData ",
            "function\n")

    if (is.null(factor))
        stop("ERROR: You must specify the factor to know which conditions ",
            "you wish to compare. Please,give the argument 'factor'.\n")

    if (min(table(input@phenoData@data[,factor])) < 2)
        stop("ERROR: To run NOISeqBIO at least two replicates per condition ",
            "are needed. Please, run NOISeq if there are not enough ",
            "replicates in your experiment.\n")

    norm <- match.arg(norm)

    # Z-scores for signal and noise
    cat("Computing Z values...\n")
    miMD <- allMDbio(input,factor,k=k,norm=norm,conditions=conditions,lc=lc,
        r=r,a0per=a0per,nclust=nclust,filter=filter,depth=depth,
        cv.cutoff=cv.cutoff,cpm=cpm)

    ##### Probability of differential expression
    cat("Computing probability of differential expression...\n")

    ## KDE estimators of f0 and f    
    desde <- min(c(miMD$Zs,miMD$Zn),na.rm=TRUE)
    hasta <- max(c(miMD$Zs,miMD$Zn),na.rm=TRUE)

    fdens <- density(miMD$Zs,adjust=adj,n=5000,from=desde,to=hasta,na.rm=TRUE)
    f <- approxfun(fdens)

    f0dens <- density(miMD$Zn,adjust=adj,n=5000,from=desde,to=hasta,na.rm=TRUE)
    f0 <- approxfun(f0dens)

    if (f0(0)/f(0) < 1) 
        print("WARNING: f0(0)/f(0) < 1 => FP with Z~0 will be detected.")

    f0.f <- f0(miMD$Zs)/f(miMD$Zs)

    ## f0,f plot
    if (plot) {
        plot(f0dens,lwd=2,col=4,main=paste("r=",r,"; a0per=",a0per,sep=""),
            xlab="theta")
        lines(fdens,lwd=2,col=1)
        legend("topleft",c("f","f0"),col=c(1,4),lwd=2)
    }

    ## ESTIMATION of p0
    p0 <- min(1/f0.f,na.rm=TRUE)
    p0=min(p0,1)

    cat(paste("p0 =",p0)); cat("\n")

    ## PROBABILITY of DIFFERENTIAL EXPRESSION
    myprob <- 1 - p0*f0.f
    if (min(myprob,na.rm=TRUE) < 0) {
        print(summary(f0.f))
    }
    names(myprob) <- names(miMD$Zs)

    cat("Probability\n")
    print(summary(myprob))

    if (!is.null(assayData(input)$exprs))
        todos <- rownames(as.matrix(assayData(input)$exprs))
    else
        todos <- rownames(as.matrix(assayData(input)$counts))

    myprob <- myprob[todos]
    names(myprob) <- todos

    ## Results
    resultat <- data.frame("level_1"=miMD$Level1,"level_2"=miMD$Level2,
        "theta"=miMD$Zs,"prob"=myprob,"log2FC"=log2(miMD$Level1/miMD$Level2))
    rownames(resultat) <- todos

    colnames(resultat)[1] <- 
        paste(unlist(strsplit(miMD$comp," "))[1],"mean",sep="_")
    colnames(resultat)[2] <- 
        paste(unlist(strsplit(miMD$comp," "))[3],"mean",sep="_")

    # resultat <- data.frame(resultat,"ranking"=ranking(resultat)$statistic)
    if (!is.null(featureData(input)@data$Length))
        resultat <- data.frame(resultat,"length"=as.numeric(as.character(
            featureData(input)@data[todos,"Length"])),stringsAsFactors=FALSE)
    if (!is.null(featureData(input)@data$GC))
        resultat <- data.frame(resultat,"GC"=as.numeric(as.character(
            featureData(input)@data[todos,"GC"])),stringsAsFactors=FALSE)
    if (!is.null(featureData(input)@data$Chromosome))
        resultat <- data.frame(resultat,"Chrom"=(as.character(
            featureData(input)@data$Chromosome)),
            "GeneStart"=as.numeric(as.character(featureData(
            input)@data$GeneStart)),"GeneEnd"=as.numeric(as.character(
            featureData(input)@data$GeneEnd)),stringsAsFactors=FALSE)

    if (!is.null(featureData(input)@data$Biotype))
        resultat <- data.frame(resultat,"Biotype"=as.character(
            featureData(input)@data[todos,"Biotype"]),stringsAsFactors=FALSE)
    
    Output(data=list(resultat),method=norm,k=miMD$k,lc=lc,factor=factor,
        comparison=miMD$comp,replicates="biological",v=0,nss=0,pnr=0)
}

################################################################################

################################################################################

# normalization.R

noitmm <- function (datos,long=1000,lc=0,k=0,refColumn=1,logratioTrim=.3,
    sumTrim=0.05,doWeighting=TRUE,Acutoff=-1e10) {
    if (!is.null(ncol(long))) {
        mynames <- long[,1]
        long <- long[,2]
        names(long) <- mynames
    }

    L <- (long/1000)^lc
    datos <- datos/L

    total <- colSums(as.matrix(datos))

    datos0 <- sinceros(datos,k)

    if (ncol(as.matrix(datos)) > 1) {
        fk <- .calcNormFactors(as.matrix(datos),refColumn=refColumn,
            method="TMM",logratioTrim=logratioTrim,sumTrim=sumTrim,
            doWeighting=doWeighting,Acutoff=Acutoff)

        fk <- fk * (total/mean(total))
        datos.norm <- t(t(datos0)/fk)
    } 
    else {
        datos.norm <- datos0/L
    }
    na.omit(datos.norm)    
    
}

noirpkm <- function (datos,long=1000,lc=1,k=0) {
    if (!is.null(ncol(long))) {
        mynames=long[,1]
        long=long[,2]
        names(long)=mynames
    }

    total <- colSums(as.matrix(datos))
    datos0 <- sinceros(datos,k)                                     
    datos.norm <- (t(t(datos0)/total)*10^6)/((long/1000)^lc)
    na.omit(datos.norm)     
}

noiuqua <- function (datos,long=1000,lc=0,k=0) {
    if (!is.null(ncol(long))) {
        mynames=long[,1]
        long=long[,2]
        names(long)=mynames
    }

    L <- (long/1000)^lc
    datos=datos/L

    datos0 <- sinceros(datos,k)

    if (ncol(as.matrix(datos)) > 1) {
        sumatot <- rowSums(datos)
        supertot <- sum(sumatot)
        counts0 <- which(sumatot == 0)

        if (length(counts0) > 0) {
            datitos <- datos[-counts0,]
        } else {
            datitos <- datos
        }

        q3 <- apply(datitos,2,quantile,probs=0.75)
        d <- q3*supertot/sum(q3)

        datos.norm <- t(t(datos0)/d)*10^6

    } else {

        datos.norm <- datos0/L

    }
    
    na.omit(datos.norm)
}

## Taken from the edgeR package with minor modifications
.calcNormFactors <- function(object,method=c("TMM","quantile"),refColumn=NULL,
    logratioTrim=.3,sumTrim=0.05,doWeighting=TRUE,Acutoff=-1e10,quantile=0.75) {
    method <- match.arg(method)
    if( is.matrix(object) ) {
        if(is.null(refColumn))
            refColumn <- 1
        data <- object
        libsize <- colSums(data)
    } 
    else {
        stop(".calcNormFactors() only operates on 'matrix' objects")
    }

    f <- switch(method,
        TMM = apply(data,2,.calcFactorWeighted,ref=data[,refColumn],
            logratioTrim=logratioTrim,sumTrim=sumTrim,doWeighting=doWeighting,
            Acutoff=Acutoff),quantile=.calcFactorQuantile(data,libsize,
            q=quantile))
    f <- f/exp(mean(log(f)))

    return(f)
}

.calcFactorQuantile <- function(data,lib.size,q=0.75) {
    y <- t(t(data)/lib.size)
    f <- apply(y,2,function(x) quantile(x,p=q))
    f/exp(mean(log(f)))
}

.calcFactorWeighted <- function(obs,ref,logratioTrim=.3,sumTrim=0.05,
    doWeighting=TRUE,Acutoff=-1e10) {
    if( all(obs==ref) )
        return(1)

    obs <- as.numeric(obs)
    ref <- as.numeric(ref)

    nO <- sum(obs)
    nR <- sum(ref)
    # log ratio of expression,accounting for library size
    logR <- log2((obs/nO)/(ref/nR)) 
    absE <- (log2(obs/nO) + log2(ref/nR))/2 # absolute expression
    v <- (nO-obs)/nO/obs + (nR-ref)/nR/ref # estimated asymptotic variance

    # remove infinite values,cutoff based on A
    fin <- is.finite(logR) & is.finite(absE) & (absE > Acutoff)

    logR <- logR[fin]
    absE <- absE[fin]
    v <- v[fin]

    # taken from the original mean() function
    n <- sum(fin)
    loL <- floor(n * logratioTrim) + 1
    hiL <- n + 1 - loL
    loS <- floor(n * sumTrim) + 1
    hiS <- n + 1 - loS
    
    #keep <- (rank(logR) %in% loL:hiL) & (rank(absE) %in% loS:hiS)
    # a fix from leonardo ivan almonacid cardenas,since rank() can return
    # non-integer values when there are a lot of ties
    keep <- (rank(logR)>=loL & rank(logR)<=hiL) & (rank(absE)>=loS 
        & rank(absE)<=hiS)
    
    if (doWeighting) 
        2^(sum(logR[keep]/v[keep],na.rm=TRUE) / sum(1/v[keep],na.rm=TRUE))
    else
        2^(mean(logR[keep],na.rm=TRUE))
}

################################################################################

################################################################################

# PCA.GENES.R

PCA.GENES <- function(X) {
    n <- ncol(X)
    p <- nrow(X)
    offset <- apply(X,2,mean)
    Xoff <- X-(cbind(matrix(1,p,1))%*%rbind(offset))

    eigen <- eigen(Xoff%*%t(Xoff)/(p-1))
    var <- cbind(eigen$values/sum(eigen$values),
        cumsum(eigen$values/sum(eigen$values)))

    loadings2 <- eigen$vectors
    scores2 <- t(Xoff)%*%loadings2

    normas2 <- sqrt(apply(scores2^2,2,sum))

    scores1 <- loadings2%*%diag(normas2)
    loadings1 <- scores2%*%diag(1/normas2)

    output <- list(eigen,var,scores1,loadings1)
    names(output) <- c("eigen","var.exp","scores","loadings")
    output
}

################################################################################

################################################################################

# probdeg.R

probdeg <- function (Mg,Dg,Mn,Dn,prec=2) {
    tot <- length(Mn)   # number of points in noise distribution
    gens <- names(Mg)
    Mruido <- abs(round(Mn,prec))
    Druido <- round(Dn,prec)
    Mgen <- abs(round(Mg,prec))
    Dgen <- round(Dg,prec)
    MDgen <- na.omit(cbind(Mgen, Dgen))
    MDunic <- unique(MDgen)
    Nres <- apply(MDunic,1,n.menor,S1=Mruido,S2=Druido)
    lugares <- apply(MDgen,1,busca,S=MDunic)
    Nconj <- Nres[lugares]
    names(Nconj) <- names(lugares)
    laprob <- Nconj / tot
    laprob <- laprob[gens]
    names(laprob) <- gens
    Nconj <- Nconj[gens]
    names(Nconj) <- gens
    laprob <- list("prob"=laprob,"numDE"=Nconj,"numNOISE"=tot)
    laprob
}

################################################################################

################################################################################

# readData.R

readData <- function(data=NULL,factors=NULL,length=NULL,biotype=NULL,
    chromosome=NULL,gc=NULL) {
    if (is.null(data))
        stop("Expression information must be provided to the readData function")

    if (is.null(factors))
        stop("Condition information must be provided to the readData funcion")

    if (!is.null(length) && !is.vector(length) && !is.data.frame(length)
        && !is.matrix(length))
        stop( "The length info should be a vector or a data.frame/matrix.")

    if (!is.null(gc) && !is.vector(gc) && !is.data.frame(gc) && !is.matrix(gc))
        stop( "The GC content info should be a vector or a data.frame/matrix.")
    
    if (!is.null(chromosome) && ncol(chromosome) != 3)
        stop( "The chromosome object should be a matrix or data.frame with ",
            "3 columns: chromosome,start position and end position.")

    if (!is.null(biotype) && !is.vector(biotype) && !is.data.frame(biotype) 
        && !is.matrix(biotype))
        stop( "The biotype info should be a vector or a data.frame/matrix.")

    countData <- as.matrix(data)

    rowNames <- rownames(countData)
    
    if (nrow(factors) == ncol(countData)) {
        rownames(factors) <- colnames(countData)
    }
    else {
        stop ("Number of rows in factors must be equal to number of ",
            "columns in data.\n")
    }

    pheno <- AnnotatedDataFrame(data=as.data.frame(factors))
    input <- ExpressionSet(assayData=countData,phenoData=pheno)

    if (!is.null(length))
        input <- addData(data=input,length=length)

    if (!is.null(gc))
        input <- addData(data=input,gc=gc)

    if (!is.null(biotype))
        input <- addData(data=input,biotype=biotype)

    if (!is.null(chromosome))
        input <- addData(data=input,chromosome=chromosome)

    input
}

addData <- function(data,length=NULL,biotype=NULL,chromosome=NULL,factors=NULL,
    gc=NULL) {
    if (!inherits(data,"eSet"))
        stop("Error. You must give an eSet object.")

    if (!is.null(length) && !is.vector(length) && !is.data.frame(length) 
        && !is.matrix(length))
        stop( "The length info should be a vector or a data.frame/matrix.")

    if (!is.null(gc) && !is.vector(gc) && !is.data.frame(gc) && !is.matrix(gc))
        stop( "The GC content info should be a vector or a data.frame/matrix.")

    if (!is.null(biotype) && !is.vector(biotype) && !is.data.frame(biotype) 
        && !is.matrix(biotype))
        stop( "The biotype info should be a vector or a data.frame/matrix.")
    
    if (!is.null(chromosome) && ncol(chromosome) != 3)
        stop( "The chromosome object should be a matrix or data.frame with ",
            "3 columns: chromosome,start position and end position.")

    if (!is.null(assayData(data)$exprs))
        rowNames <- rownames(assayData(data)$exprs)
    else
        rowNames <- rownames(assayData(data)$counts)

    # If exists length
    if (!is.null(length)) {
        Length <- rep(NA,length(rowNames))
        names(Length) <- rowNames
        if (is.vector(length)) {
            Length[rowNames] <- as.numeric(as.character(length[rowNames]))
        } 
        else if (is.data.frame(length) || is.matrix(length)) {
            if (ncol(length) == 2) {
                # We assume that the feature names are in the first column and 
                # the length in the second
                rownames(length) <- length[,1]
                Length[rowNames] <- as.numeric(as.character(length[rowNames,2]))
            } 
            else if (ncol(length) == 1) {
                # We assume that the length are in the first column and the 
                # feature names in the rownames
                Length[rowNames] <- as.numeric(as.character( 
                    length[rowNames,1]))
            } 
            else {
                stop( "The length matrix/data.frame contains more columns ",
                    "than expected.")
            }
        }
        featureData(data)@data <- cbind(featureData(data)@data,Length)
    }

    # If exists gc
    if (!is.null(gc)) {
        GC <- rep(NA,length(rowNames))
        names(GC) <- rowNames
        if (is.vector(gc)) {
            GC[rowNames] <- as.numeric(as.character(gc[rowNames]))
        } 
        else if (is.data.frame(gc) || is.matrix(gc)) {
            if (ncol(gc) == 2) {
                # We assume that the feature names are in the first column and 
                # the GC content in the second
                rownames(gc) <- gc[,1]
                GC[rowNames] <- as.numeric(as.character(gc[rowNames,2]))
            } 
            else if (ncol(gc) == 1) {
                # We assume that the GC contents are in the first column 
                # and the feature names in the rownames
                GC[rowNames] <- as.numeric(as.character(gc[rowNames,1]))
            } 
            else {
                stop( "The GC matrix/data.frame contains more columns ",
                    "than expected.")
            }
        }
        featureData(data)@data <- cbind(featureData(data)@data,GC)
    }
    
    # If exists biotype
    if (!is.null(biotype)) {
        Biotype <- rep(NA,length(rowNames))
        names(Biotype) <- rowNames
        if (is.vector(biotype)) {
            Biotype[rowNames] <- as.character(biotype[rowNames])
        } 
        else if (is.data.frame(biotype) || is.matrix(biotype)) {
            if (ncol(biotype) == 2) {
                # We assume that the feature names are in the first column and 
                # the biotypes in the second
                rownames(biotype) <- biotype[,1]
                Biotype[rowNames] <- as.character( biotype[rowNames,2])
            } 
            else if (ncol(biotype) == 1) {
                # We assume that the biotypes are in the first column and 
                # the feature names in the rownames
                Biotype[rowNames] <- as.character( biotype[rowNames,1]    )
            } 
            else {
                stop("The biotype matrix/data.frame contains more ",
                    "columns than expected.")
            }
        }        
        featureData(data)@data <- cbind(featureData(data)@data,Biotype)
        featureData(data)@data$Biotype <- 
            as.character(featureData(data)@data$Biotype)
    }
    
    # If exists chromosome
    if (!is.null(chromosome)) {
        Chromosome <- GeneStart <- GeneEnd <- rep(NA,length(rowNames))
        names(Chromosome) <- names(GeneStart) <- names(GeneEnd) <- rowNames
        Chromosome[rowNames] <- as.character(chromosome[rowNames,1])
        GeneStart[rowNames] <- as.numeric(as.character(chromosome[rowNames,2]))
        GeneEnd[rowNames] <- as.numeric(as.character(chromosome[rowNames,3]))
        featureData(data)@data <- cbind(featureData(data)@data,Chromosome,
            GeneStart,GeneEnd)
    }

    # If exists new factors
    if (!is.null(factors))
        phenoData(data)@data <- cbind(phenoData(data)@data,factors)
    
    data
}

################################################################################

################################################################################

# saturation.plot.R

#### SATURATION PLOTS

## Data for saturation plot (with or without biotypes)

saturation.dat <- function (input,k=0,biotypes=NULL,ndepth=6) {
    if (inherits(input,"eSet") == FALSE)
        stop("Error. You must give an eSet object\n")

    if (!is.null(assayData(input)$exprs))
        datos <- assayData(input)$exprs
    else
        datos <- assayData(input)$counts
    
    if (!is.null(featureData(input)$Biotype))
        infobio <- featureData(input)$Biotype        
    else
        infobio <- NULL

    nsam <- NCOL(datos)

    if (!is.null(infobio)) {        
        if(is.null(biotypes)) {
            biotypes <- unique(infobio)
            names(biotypes) <- biotypes                 
        }     
    } else { biotypes=NULL }     

    satura <- vector("list",length=length(biotypes)+1)
    names(satura) <- c("global",names(biotypes))

    ndepth1 <- ceiling(ndepth/2)

    datos <- round(datos,0)
    datos0 <- datos + 0.2
    
    # Random subsamples for each sample
    submuestras <- seq.depth <- vector("list",length=nsam)
    names(submuestras) <- names(seq.depth) <- colnames(datos)

    for (n in seq_len(nsam)) { # simulating subsamples for each sample
        total <- sum(datos[,n]) # total counts in sample n
        # simulation for each depth and real depth
        varias <- vector("list",length=ndepth+1)

        for (i in seq_len(ndepth1)) { # simulating depths < real depth
            muestra <- rmultinom(10,size=round(total/(ndepth1+1),0),
                prob=datos[,n])
            if (i == 1)
                varias[[i]] <- muestra 
            else
                varias[[i]] <- varias[[i-1]] + muestra
        }

        varias[[ndepth1+1]] <- as.matrix(datos[,n])

        for (i in (ndepth1+2):(ndepth+1)) { # simulating depths < real depth
            muestra <- rmultinom(10,size=round(total/(ndepth1+1),0),
                prob=datos0[,n])
            if (i == ndepth1+2)
                varias[[i]] <- matrix(varias[[i-1]],ncol=10,
                    nrow=nrow(varias[[i-1]])) + muestra
            else
                varias[[i]] <- varias[[i-1]] + muestra
        }

        submuestras[[n]] <- varias
        seq.depth[[n]] <- c(round(total/(ndepth1+1),0)*seq_len(ndepth1),total,
            round(total/(ndepth1+1),0)*((ndepth1+2):(ndepth+1)))
    }

    # Global saturation
    satura[[1]] <- vector("list",length=nsam)
    names(satura[[1]]) <- colnames(datos)

    for (n in seq_len(nsam)) { # for each sample
        satura[[1]][[n]] <- vapply(submuestras[[n]],function(x) { 
            mean(apply(x,2,noceros,k=k)) 
        },numeric(1))
    }

    # Per biotypes
    if (!is.null(infobio)) { # if biotypes available
        biog <- lapply(biotypes,function(x) { 
            which(is.element(infobio,x)) 
        })
        names(biog) <- names(biotypes)
        for (j in 2:length(satura)) { # for each biotype
            satura[[j]] <- vector("list",length=nsam)
            names(satura[[j]]) <- colnames(datos)
            for (n in seq_len(nsam)) { # for each sample
                conbio <- lapply(submuestras[[n]],function(x) { 
                    as.matrix(x[biog[[j-1]],]) 
                })
                satura[[j]][[n]] <- vapply(conbio,function(x) { 
                    mean(apply(x,2,noceros,k=k)) 
                },numeric(1))
            }
        }
    } 
    else
        biog <- NULL

    # computing detection increasing per million reads
    newdet <- vector("list",length=1+length(biotypes))
    names(newdet) <- c("global",names(biotypes))
    
    for (j in seq_len(length(newdet))) {
        newdet[[j]] <- vector("list",length=nsam)
        names(newdet[[j]]) <- colnames(datos)
        for (n in seq_len(nsam)) {
            puntos <- data.frame("x"=seq.depth[[n]],"y"=satura[[j]][[n]])
            pendi <- NULL
            for(i in 2:nrow(puntos))                
                pendi <- c(pendi,
                    (puntos$y[i]-puntos$y[i-1])/(puntos$x[i]-puntos$x[i-1]))
            newdet[[j]][[n]] <- c(NA,pendi*1000000)
        }
    }

    bionum <- c(NROW(datos),vapply(biog,length,integer(1)))
    names(bionum) <- c("global",names(biog))
    
    # Results at real sequencing depth
    real <- vector("list",length=length(satura))
    names(real) <- names(satura)
    realdepth <- vapply(seq.depth,function (x) x[ndepth1+1],numeric(1))/10^6
    for (i in seq_len(length(real))) {
        real[[i]] <- data.frame("depth"=realdepth,
            "detec"=vapply(satura[[i]],function (x) x[ndepth1+1],numeric(1)))
        rownames(real[[i]]) <- colnames(datos)
    }
    
    # Results
    satura <- list("saturation"=satura,"bionum"=bionum,"depth"=seq.depth,
        "newdet"=newdet,"real"=real)
    
    satura
}

################################################################################

Try the metaseqR2 package in your browser

Any scripts or data that you put into this service are public.

metaseqR2 documentation built on Nov. 8, 2020, 7:34 p.m.