R/base.R

Defines functions pseudoBulk pseudoPlot f fdrControl logW logit check.prob

check.prob = function(P,min.zero=.Machine$double.xmin){
    # Function used to check whether probabilities are between 0 and 1
    P = pmax(pmin(P,1),0)
    if(sum(P==0)>0){P[P==0] = min.zero}
    return(P/rowSums(P))
}

logit <- function(x){exp(x)/(1+exp(x))}

logW <- function(logFB,theta,M,N,L){
    return(do.call(rbind,lapply(seq_len(N),function(x){
        z = do.call(rbind,lapply(seq_len(L),function(y){
            log(theta$delta[y]) + logFB[[y]][['logF']][M,,x]
        }))
        rmaxz = Biobase::rowMax(z)
        return((rmaxz + log(rowSums(exp(z-rmaxz)))) - (max(z) + log(sum(exp(z-max(z))))))
    })))
}

fdrControl <- function(prob,fdr){
    if(any(prob<0) | any(prob>1)){stop('Posterior probabilities must be between 0 and 1')}
    if((fdr<0) | (fdr>1)){stop('fdr must be between 0 and 1')}
    
    notpp = FDR = Window = NULL
    
    DT <- data.table::data.table(notpp = 1-prob,Window = seq_len(length(prob)),key = 'Window')
    return(DT[order(notpp),][,FDR := ((cumsum(notpp)/seq_len(nrow(DT)))<fdr)][order(Window),]$FDR)
}

f <- function(par,weights,size){
    # MULTINOMIAL LOG-LIKELIHOOD
    return(-sum(weights*dbinom(x = 0:size,size = size,prob = logit(par),log = T)))
}

pseudoPlot <- function(sc,mat,ranges,normalize = FALSE,ptssize = 1){
    # This function plots counts from bulk data (sc) and compares it to the pseudobulk (mat) for a given region (ranges)
    
    Data = Start = Counts = Cell = Cluster =  NewCell = V1 = NULL
    
    index <- IRanges::overlapsAny(sc,ranges)
    str <- ranges(SummarizedExperiment::rowRanges(sc[index,]))@start
    
    if(normalize){
        newsc <-  epigraHMM::createOffset(SummarizedExperiment::SummarizedExperiment(assays = list(counts = cbind(Matrix::rowSums(SummarizedExperiment::assay(sc)[index,]),
                                                                                                                  mat[index,])),
                                                                                     rowRanges = SummarizedExperiment::rowRanges(sc[index,]),
                                                                                     colData = data.frame(Data = c('Bulk',paste0('Pseudobulk',1:ncol(mat))))),type = 'loess',span = 1)
        SummarizedExperiment::assay(newsc,'counts') <- SummarizedExperiment::assay(newsc,'counts')/exp(SummarizedExperiment::assay(newsc,'offset'))
        
    } else{
        newsc <- SummarizedExperiment::SummarizedExperiment(assays = list(counts = cbind(Matrix::rowSums(SummarizedExperiment::assay(sc)[index,]),
                                                                                         mat[index,])),
                                                            rowRanges = SummarizedExperiment::rowRanges(sc[index,]),
                                                            colData = data.frame(Data = c('Bulk',paste0('Pseudobulk',1:ncol(mat)))))
    }
    
    dt <- rbindlist(lapply(seq_len(ncol(newsc)),function(x){
        data.table(Data = colData(newsc)$Data[x],Counts = SummarizedExperiment::assay(newsc,'counts')[,x], Start = str)
    }))
    
    fig1 <- ggplot2::ggplot(data = dt[Data == 'Bulk',],ggplot2::aes(x = Start,y = Counts))+
        ggplot2::facet_grid(rows = ggplot2::vars(Data))+
        ggplot2::geom_line()+
        ggplot2::theme_bw()+
        ggplot2::theme(axis.text = ggplot2::element_text(size = 6),axis.title = ggplot2::element_text(size = 6),
                       panel.grid.major = ggplot2::element_blank(),panel.grid.minor = ggplot2::element_blank(),strip.text = ggplot2::element_text(size = 6))+
        ggplot2::scale_x_continuous(labels = scales::scientific)+
        ggplot2::ylab(ifelse(normalize,'Normalized Counts','Counts'))+
        ggplot2::xlab(paste0('Genomic Window (',seqnames(ranges),')'))
    fig2 <- ggplot2::ggplot(data = dt[!(Data == 'Bulk'),],ggplot2::aes(x = Start,y = Counts))+
        ggplot2::facet_grid(rows = ggplot2::vars(Data))+
        ggplot2::geom_line()+
        ggplot2::theme_bw()+
        ggplot2::theme(axis.text = ggplot2::element_text(size = 6),axis.title = ggplot2::element_text(size = 6),
                       panel.grid.major = ggplot2::element_blank(),panel.grid.minor = ggplot2::element_blank(),strip.text = ggplot2::element_text(size = 6))+
        ggplot2::scale_x_continuous(labels = scales::scientific)+
        ggplot2::ylab(ifelse(normalize,'Normalized Counts','Counts'))+
        ggplot2::xlab(paste0('Genomic Window (',seqnames(ranges),')'))
    
    subsc <- sc[IRanges::overlapsAny(sc,ranges),]
    subdt <- rbindlist(lapply(unique(sc$Cluster),function(x){
        ctd.mat <- summary(SummarizedExperiment::assay(subsc[,colData(subsc)$Cluster==x],'counts'))
        
        ctd <- data.table(Cluster = paste0('Cluster',x), Cell = ctd.mat$j, Start = str[ctd.mat$i],Counts = ctd.mat$x)
        
        ctd[,Counts := ifelse(Counts==1,'1',ifelse(Counts==2,'2','>2'))]
        ctd$Counts <- factor(ctd$Counts,levels = c('1','2','>2'))
        return(ctd)
    }))
    subdt <- subdt[order(Start,Cell)]
    
    fig3 <- ggplot2::ggplot(data = subdt,ggplot2::aes(y = Cell,x = Start, color= Counts))+
        ggplot2::facet_grid(rows = ggplot2::vars(Cluster),scales = 'free_y')+
        ggplot2::geom_point(alpha = 0.3,shape=20,size = ptssize)+
        ggplot2::theme_bw()+
        ggplot2::theme(panel.grid.major = ggplot2::element_blank(),
                       panel.grid.minor = ggplot2::element_blank(),
                       legend.position = 'top',legend.direction = 'horizontal',
                       #legend.box.background = ggplot2::element_rect(colour = "black"),
                       legend.title = ggplot2::element_text(size = 8),
                       legend.text  = ggplot2::element_text(size = 8),
                       axis.text = ggplot2::element_text(size = 6),axis.title = ggplot2::element_text(size = 6),strip.text = ggplot2::element_text(size = 6),
                       legend.key.size = ggplot2::unit(0.1, "lines"))+
        ggplot2::scale_color_manual(values = c('black','red','purple'))+
        ggplot2::scale_x_continuous(labels = scales::scientific)+
        ggplot2::xlab(paste0('Genomic Window (',seqnames(ranges),')'))+
        ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size = 2,alpha = 1)))
    
    fig <- ggpubr::ggarrange(fig1,fig2,fig3,nrow = 1,ncol = 3,widths = c(1.875,1.875,2.25)/6,common.legend = T)
    
    return(fig)
}

pseudoBulk <- function(sc,group,type){
    # This function calculate pseudobulk on rows or columns
    
    if(type == 'rows'){
        if(!length(group)==nrow(sc)){stop('dimensions not compatible')}
        
        mat <- t(t(SummarizedExperiment::assay(sc)) %*% Matrix::sparse.model.matrix(~0+group))
        
    } else{
        if(type == 'cols'){
            if(!length(group)==ncol(sc)){stop('dimensions not compatible')}
            
            mat <- SummarizedExperiment::assay(sc) %*% Matrix::sparse.model.matrix(~0+group)
            
        } else{
            stop('type not valid')
        }
    }
    return(mat)
}
plbaldoni/mikado documentation built on June 9, 2020, 3:34 p.m.