R/diffGenes.R
In chroGPS: chroGPS2: Generation, visualization and differential analysis of epigenome maps

Documented in combineGenesMatrix diffGenes find.fdr find.threshold plotDiffGenes

## Functions to perform differential gene maps analysis

## ## Merge replicates (columns)
## mergeReplicates <- function(x,id,mergeBy='any')
##     {
##         if (is.numeric(mergeBy))
##             if (!(mergeBy<=1 & mergeBy>0)) stop('mergeBy has to be either any, all, or a number between (0,1]')
##         if (is.character(mergeBy) & !(mergeBy %in% c('any','all'))) stop('mergeBy has to be either any, all, or a number between (0,1]')

##         if (ncol(x) != length(id)) stop('Non-conformable data / id')

##         ## Collapse with selected policy
##         x <- do.call(cbind,lapply(sort(unique(id)),function(i) rowMeans(as.data.frame(x[,id==i]))))
##         if (mergeBy=='any') x[x>0] <- 1
##         if (mergeBy=='all') x[x<1] <- 0
##         if (mergeBy>0 & mergeBy<=1) x[x<mergeBy] <- 0
##         colnames(x) <- sort(unique(id))
##         return(x)
##     }

## distGPS extensions to generate differential maps
## x1, x2 are tables of genesxfactors, with no repeated row or colnames (ie one column per factor, unified by whatever criteria you like before calling this)
combineGenesMatrix <- function(x,y,label.x,label.y,minFactors=10,minGenes=1000)
    ## stacks together genes from x and y, using common factors and removing genes without marks in those
    {
        ## Common factors
        selfactors <- sort(intersect(colnames(x),colnames(y)))
        if (length(selfactors)<minFactors) stop('Not enough common factors between both datasets')
        ## Genes with marks for common factors in both them
        x.genes <- rownames(x)[rowSums(x[,selfactors])>0]
        y.genes <- rownames(y)[rowSums(y[,selfactors])>0]
        allgenes <- sort(intersect(x.genes,y.genes))
        if (length(allgenes)<minGenes) stop('Not enough common genes with information in both datasets')
        ans <- rbind(x[allgenes,selfactors],y[allgenes,selfactors])
        rownames(ans) <- c(paste(allgenes,label.x,sep='.'),paste(allgenes,label.y,sep='.'))
        ans
    }

diffGenes <- function(xy,m,clus,label.x,label.y,clusName=NULL,fdr=TRUE,mc.cores=1)
    ## retrieves cluster ID information for genes in x and y, with cluster id and pp per gene
    ## returns nice table with gene geneid, map position, cluster id and pp for X and Y
    {
        label.xx <- paste('.',label.x,sep='')
        label.yy <- paste('.',label.y,sep='')
        geneid <- as.character(rownames(xy)) # nice geneids without background identifiers
        geneid <- gsub(label.yy,'',gsub(label.xx,'',geneid))
        if (is.null(clusName)) { clusName <- clusNames(clus)[1] }
        uid <- NULL # to make it visible to the cmd check
        txt <- paste("uid <- paste(",paste("xy[,",1:ncol(xy),"]",collapse=","),",sep=',')",sep="")
        eval(parse(text=txt))
        coords <- m@points[uid,]
        clusid <- clus@clus[[clusName]]$id
        propclass <- unlist(parallel::mclapply(names(clusid),function(p) clus@clus[[clusName]]$propclass[[clusid[p]]][p],mc.cores=mc.cores))
        ans <- data.frame(type=c(rep(label.x,nrow(xy)/2),rep(label.y,nrow(xy)/2)),geneid=geneid,uid=uid,X=coords[uid,1],Y=coords[uid,2],ClusID=clusid[uid],ProbClus=propclass[uid])
        sel <- ans$type==label.x
        ans1 <- ans[sel,-1]; 
        ans2 <- ans[!sel,-1:-2]; 
        ## Add FDR if wanted
        if (fdr)
            {
                ans1 <- ans1[order(ans1$ProbClus),]
                ans1$FDR <- find.fdr(ans1$ProbClus,ans1$ProbClus)$fdr
                ans2 <- ans2[order(ans2$ProbClus),]
                ans2$FDR <- find.fdr(ans2$ProbClus,ans2$ProbClus)$fdr
            }
        colnames(ans1)[-1] <- paste(colnames(ans1)[-1],label.x,sep='.')
        colnames(ans2) <- paste(colnames(ans2),label.y,sep='.')
        #ClusIDs <- paste(ans1$ClusID.X,ans2$ClusID.Y,sep='.')
        #cbind(ans1,ans2,ClusIDs)
        cbind(ans1,ans2)
    }

# Add functions to cut PDE using a given FDR threshold (MDA find.threshold)
# I also use this function to compute FDRs for each individual pp
find.fdr <- function(v,threshold) {
    # Input
    #   v: vector with posterior probabilities of differential expression (missing values are removed)
    #   k: vector with thresholds to declare differential expression (defaults to a sequence between min(v),max(v))
    # Output: a list containing the following elements
    #   threshold: the k vector
    #   fdr: estimated FDR
    #   fnr: estimated FNR

    if (missing(v)) stop('The vector of posterior probabilities v must be specified')
    if (sum(is.na(v))>0) v <- v[!is.na(v)]
    if (missing(threshold)) threshold <- seq(min(v),max(v)-diff(range(v))/100,diff(range(v))/100)

    fdr <- fnr <- rep(NA,length(threshold))
    for (i in 1:length(threshold)) {
        fdr[i] <- sum((v>=threshold[i])*(1-v))/sum(v>=threshold[i])
        fnr[i] <- sum((v<threshold[i])*v)/sum(v<threshold[i])
    }
    
    return(list(threshold=threshold,fdr=fdr,fnr=fnr))
}

find.threshold <- function(pde,fdr,eps=.0001) {
    # Input
    #   pde: vector of probabilities of differential expression
    #   fdr: desired FDR
    # Output:
    #   threshold: threshold for probability of differential expression with estimated FDR closest to 'fdr'
    #   fdrest: estimated FDR

    pde <- pde[order(pde)]
    imin <- 1; imax <- length(pde); i <- round(imax/2)
    fdrest <- find.fdr(pde,pde[i])$fdr
    while ((imax-imin>1) & (abs(fdr-fdrest)>eps)) {
        if (fdrest>fdr) { imin <- i } else { imax <- i }
        i <- round(.5*(imin+imax))
        fdrest <- find.fdr(pde,pde[i])$fdr
        print(fdrest)
      }
    return(list(threshold=pde[i],fdrest=fdrest))

}

# Compute cluster centroids
#Compute and plot cluster centroids and plot cluster number
# m is mds object
# clus is clusGPS object
# clusName for cluster configuration in case more than one is within object
plotCenters <- function(m,clus,clusName=NULL,col='#00000090',bg='#FFFFFF90',lwd=4,pch=21,cex=5,text.cex=2,text.col='black',plot=TRUE,...)
{
    if (is.null(clusName)) { clusName <- clusNames(clus)[1]; warning('More than one cluster configuration available, used first element') }
    m@points <- m@points[names(clus@clus[[clusName]]$id),] # ensure same order of elements is used
    centers <- do.call(rbind,by(m@points,clus@clus[[clusName]]$id,colMeans)) # Compute mean of coordinates for every cluster
    if (plot)
        {
            points(centers,pch=pch,cex=cex,col=col,bg=bg,lwd=lwd)
            text(x=centers[,1],y=centers[,2],1:nrow(centers),cex=2,col='black')
        }
    centers
}

plotTransitions <- function(x,m,clus,res,plotCentroids=TRUE,transID,xlim,ylim,...)
    {
        # split data from each background
        fdrpos <- length(grep('FDR',colnames(res)))
        if (fdrpos==0)
            {
                x1 <- res[,1:6]
                x2 <- res[,c(1,7:11)]
            }
        else if (fdrpos==2)
            {
                x1 <- res[,1:7]
                x2 <- res[,c(1,8:13)]
            }
        else stop('Invalid Differential results table')

        ## check
        rownames(x1) <- x1$geneid
        rownames(x2) <- x2$geneid
        
        ## Select genes and plot
        transitions <- res$cc
        genes2plot <- as.character(res$geneid[transitions==transID])
        points(x1[genes2plot,c(3,4)],pch=19,col='blue',xlim=xlim,ylim=ylim)
        points(x2[genes2plot,c(3,4)],pch=19,col='red',xlim=xlim,ylim=ylim)
        segments(x1[genes2plot,3],x1[genes2plot,4],x2[genes2plot,3],x2[genes2plot,4],col=rgb(0,1,0,.35),lwd=2)
        if (plotCentroids & length(genes2plot)>=5)
            {
                points(ellipse::ellipse(x=cov(x=x1[genes2plot,c(3,4)]),centre=colMeans(x1[genes2plot,c(3,4)]),level=.5),type='l',xlim=xlim,ylim=ylim,col='blue',lwd=4)
                points(ellipse(x=cov(x=x2[genes2plot,c(3,4)]),centre=colMeans(x2[genes2plot,c(3,4)]),level=.5),type='l',xlim=xlim,ylim=ylim,col='red',lwd=4)
            }
    }

## Add functions to plot these results...
## x: combined GenesxFactors matrix as output from diffGPS
## m: an MDS obtained from x via distGPS and mds
## clus: a clustering from x and m, obtained via clusGPS
## res: a differential analysis result obtained via diffGPS.clus
plotDiffGenes <- function(x,m,clus,res,label.x='1',label.y='2',clusName=NULL,transitions=NULL,plotIDs=TRUE,fdr1=0.05,fdr2=0.05,minGenes=1,plotCentroids=TRUE,centroid.lwd=2,centroid.lty=1,point.col='lightgrey',probContour=0.75,contour.lwd=2,contour.lty=2,dolegend=TRUE,poslegend='bottomright',xlim,ylim,...)
    {
        ## checks
        if (is.null(clusName)) clusName <- clusNames(clus)[1]
        if (missing(xlim)) xlim <- range(m@points)
        if (missing(ylim)) ylim <- range(m@points)

        ## Generate transition table and prefilter by FDR if desired
        fdr <- res[,grep('FDR',colnames(res))]
        if (!ncol(fdr) %in% c(0,2)) stop('Invalid Differential results table')
        if (ncol(fdr)==2) res <- res[fdr[,1]<fdr1 & fdr[,2]<fdr2,]
        res$cc <- apply(res[,grep('ClusID',colnames(res))],1,paste,collapse='.')

        # Plot selected transitions
        if (is.null(transitions)) transitions <- sort(unique(res$cc))
        for (mycc in transitions)
            {
                ## Main map with cluster contours and IDs
                plot(m,point.col=point.col,xlim=xlim,ylim=ylim,...)
                plot(clus,type='contours',k=clusName,lwd=contour.lwd,probContour=probContour,lty=contour.lty,xlim=xlim,ylim=ylim,...)
                ## Plot Genes changing clusters (plotTransitions)
                if (sum(res$cc==mycc)>=minGenes) plotTransitions(x,m,clus,res,plotCentroids=TRUE,transID=mycc,xlim=xlim,ylim=ylim)
                ## Legend with cluster size
                if (dolegend) {
                    #tclus <- table(clusterID(clus,clusName))
                    #names(tclus) <- clusID
                    tclus <- tabClusters(clus,clusName)
                    legend(poslegend,legend=paste(names(tclus),as.numeric(tclus),sep=':'),col=rainbow(length(tclus)),pch=19,bty='n',cex=1,title='Cluster: # of elements')
                    }
                ## Compute cluster centers and plot cluster number
                #clusID <- 1:max(clusterID(clus,clusName))
                plotCenters(m,clus,clusName=clusName)
                ## Final legend
                legend('topleft',legend=sprintf('r2=%.3f / stress=%.3f',m@R.square,m@stress))
                legend('topright',legend=sprintf('Cluster Transition: %s\nFDR=%.2f/%.2f (n=%d)',mycc,fdr1,fdr2,sum(res$cc==mycc)))
                legend('bottomleft',col=c('blue','red'),lwd=2,legend=c(label.x,label.y))
                ##legend('bottomleft',col=c('blue','red'),lwd=2,legend=c('S2','BG3'),title=sprintf('elliptic hull (50%%)\n(S2 genes lost %d (%.2f %%))',table(genes.clus$u)[as.character(cc)],loseprop[as.character(cc)]*100),bty='n')
            }
    }

enrichedGO.plot <- function()
    {
        ## Plot GO enrichments
        #sel <- goenrich$S2.BG3.Clus==cc
        #if (sum(sel)>0)
        #    {
        #        par(new=TRUE)
        #        bp <- barplot(goenrich$pp[sel],horiz=TRUE,main=sprintf('Enriched GO terms in cluster transition %s',cc),col='white',xlim=c(-8,6),axes=FALSE,border='darkgrey')
        #        abline(v=-log10(c(0.1,0.01,0.001,0.0001,0.000001)),col='lightgrey')
        #        abline(v=-log10(0.05),col='red')
        #        text(x=.1,y=bp,goenrich$name[sel],cex=.75,adj=0)
        #        axis(3,at=1:6,cex.axis=.5)
        #    }
        #else text(x=.7,y=mean(range(m@points)),'No significant enrichments found',cex=1.5,adj=0)
    }

Any scripts or data that you put into this service are public.

chroGPS documentation built on Oct. 31, 2019, 4:52 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

chroGPS
chroGPS2: Generation, visualization and differential analysis of epigenome maps

R/diffGenes.R
In chroGPS: chroGPS2: Generation, visualization and differential analysis of epigenome maps

Defines functions combineGenesMatrix diffGenes find.fdr find.threshold plotCenters plotTransitions plotDiffGenes enrichedGO.plot

Documented in combineGenesMatrix diffGenes find.fdr find.threshold plotDiffGenes

Try the chroGPS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

chroGPS chroGPS2: Generation, visualization and differential analysis of epigenome maps

R/diffGenes.R In chroGPS: chroGPS2: Generation, visualization and differential analysis of epigenome maps

Defines functions combineGenesMatrix diffGenes find.fdr find.threshold plotCenters plotTransitions plotDiffGenes enrichedGO.plot

Documented in combineGenesMatrix diffGenes find.fdr find.threshold plotDiffGenes

Try the chroGPS package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

chroGPS
chroGPS2: Generation, visualization and differential analysis of epigenome maps

R/diffGenes.R
In chroGPS: chroGPS2: Generation, visualization and differential analysis of epigenome maps