R/map.patchseq.r

Defines functions map.patchseq

Documented in map.patchseq

#' Map PatchSeq
#'
#' Takes celltype specificity data ('ctd') and patchseq expression data ('patchdata') and returns which cell type each patchseq cell maps onto
#'
#' @param ctd Cell type specificity data as generated by EWCE
#' @param patchdata Gene by cell matrix of patchseq data
#' @param sdThresh Genes from the patchseq data will only be kept if they have a standard devation across the dataset larger than this figure
#' @param annotLevel Annotation level of ctd to use
#' @param useSDthresh Should standard deviation or total reads be used to determine which patchseq genes to keep?
#' @param reps How many bootstrap replicates to use. Ideally 10000.
#'
#' @return A list with assigned and fullRes
#'
#' @examples
#' map.patchseq(ctd,patchdata_Reduced,sdThresh=1,annotLevel=2,useSDthresh=TRUE,reps=10000)
#'
#' @export
map.patchseq <- function(ctd,patchdata,sdThresh=1,annotLevel=2,useSDthresh=TRUE,reps=10000){
    # Reduce the number of genes in the PatchSeq data to use for classification
    if(useSDthresh){
        # ... Either use the most variable (sd>1)
        patchdata_Reduced = patchdata[apply(patchdata,1,sd)>sdThresh,]
    }else{
        # ... Or those with over 200 reads across all the cells
        patchdata_Reduced = patchdata[apply(patchdata,1,sum)>200,]
    }
    
    # 'assigned' will store the celltype assigned to each patchseq cell
    assigned = rep("",dim(patchdata_Reduced)[2])
    
    # Find all genes shared between the patchseq dataset and the celltype specificity dataset
    bg_mouse=rownames(patchdata_Reduced)[rownames(patchdata_Reduced) %in% rownames(ctd[[annotLevel]]$specificity)]
    
    # For each cell in the patchseq data...
    for(i in 1:dim(patchdata_Reduced)[2]){
        # Which genes are expressed in patchseq cell i? (with at least one read)
        genesInCell = rownames(patchdata_Reduced)[patchdata_Reduced[,i]>1]
        
        hits=genesInCell[genesInCell %in% bg_mouse]
        whichCT = rep(1,length(colnames(ctd[[annotLevel]]$specificity)))
        names(whichCT) = colnames(ctd[[annotLevel]]$specificity)
        # Compare the Patchseq cell to each cell type in the reference dataset...
        whichCT = get.ct.ps(cellTypesToConsider = colnames(ctd[[annotLevel]]$specificity),
                            specificity         = ctd[[annotLevel]]$specificity,
                            hitGenes            = hits,
                            patchCellI          = i,
                            patchSeqExp         = patchdata_Reduced,
                            bg                  = bg_mouse)
        
        if(i==1){ allRes=whichCT }else{ allRes = rbind(allRes,whichCT) }
        
        # If more than one cell type has p<0.1, then double check the result
        thresh = 0.1
        loopCount=0
        while(sum(whichCT<thresh)>1){
            loopCount=loopCount+1
            if(loopCount>5){break()}
            inGame  = names(whichCT[whichCT<thresh])
            subSpec = ctd[[annotLevel]]$specificity[,inGame]
            subSpec = subSpec[!apply(subSpec,1,sum)==0,]
            subSpec = subSpec/apply(subSpec,1,sum)
            hits = hits[hits %in% rownames(subSpec)]
            
            whichCT2 = rep(1,length(inGame))
            names(whichCT2) = inGame
            whichCT2 = get.ct.ps(cellTypesToConsider = inGame,
                                 specificity         = subSpec,
                                 hitGenes            = hits,
                                 patchCellI          = i,
                                 patchSeqExp         = patchdata_Reduced,
                                 bg                  = bg_mouse[bg_mouse %in% rownames(subSpec)])
            whichCT[names(whichCT2)]=whichCT2
            print("loop")
        }
        
        if(i==1){
            fullRes=whichCT
        }else{
            fullRes = rbind(fullRes,whichCT)
        }
        
        
        print(sort(whichCT))
        assigned[i] = names(sort(whichCT)[1])
        print(assigned[i])
        print(i)
    }    
    allRes = list(assigned=assigned,fullRes=fullRes)
}
NathanSkene/PatchseqMap documentation built on May 17, 2019, 1:35 p.m.