#' Map PatchSeq
#'
#' Takes celltype specificity data ('ctd') and patchseq expression data ('patchdata') and returns which cell type each patchseq cell maps onto
#'
#' @param ctd Cell type specificity data as generated by EWCE
#' @param patchdata Gene by cell matrix of patchseq data
#' @param sdThresh Genes from the patchseq data will only be kept if they have a standard devation across the dataset larger than this figure
#' @param annotLevel Annotation level of ctd to use
#' @param useSDthresh Should standard deviation or total reads be used to determine which patchseq genes to keep?
#' @param reps How many bootstrap replicates to use. Ideally 10000.
#'
#' @return A list with assigned and fullRes
#'
#' @examples
#' map.patchseq(ctd,patchdata_Reduced,sdThresh=1,annotLevel=2,useSDthresh=TRUE,reps=10000)
#'
#' @export
map.patchseq <- function(ctd,patchdata,sdThresh=1,annotLevel=2,useSDthresh=TRUE,reps=10000){
# Reduce the number of genes in the PatchSeq data to use for classification
if(useSDthresh){
# ... Either use the most variable (sd>1)
patchdata_Reduced = patchdata[apply(patchdata,1,sd)>sdThresh,]
}else{
# ... Or those with over 200 reads across all the cells
patchdata_Reduced = patchdata[apply(patchdata,1,sum)>200,]
}
# 'assigned' will store the celltype assigned to each patchseq cell
assigned = rep("",dim(patchdata_Reduced)[2])
# Find all genes shared between the patchseq dataset and the celltype specificity dataset
bg_mouse=rownames(patchdata_Reduced)[rownames(patchdata_Reduced) %in% rownames(ctd[[annotLevel]]$specificity)]
# For each cell in the patchseq data...
for(i in 1:dim(patchdata_Reduced)[2]){
# Which genes are expressed in patchseq cell i? (with at least one read)
genesInCell = rownames(patchdata_Reduced)[patchdata_Reduced[,i]>1]
hits=genesInCell[genesInCell %in% bg_mouse]
whichCT = rep(1,length(colnames(ctd[[annotLevel]]$specificity)))
names(whichCT) = colnames(ctd[[annotLevel]]$specificity)
# Compare the Patchseq cell to each cell type in the reference dataset...
whichCT = get.ct.ps(cellTypesToConsider = colnames(ctd[[annotLevel]]$specificity),
specificity = ctd[[annotLevel]]$specificity,
hitGenes = hits,
patchCellI = i,
patchSeqExp = patchdata_Reduced,
bg = bg_mouse)
if(i==1){ allRes=whichCT }else{ allRes = rbind(allRes,whichCT) }
# If more than one cell type has p<0.1, then double check the result
thresh = 0.1
loopCount=0
while(sum(whichCT<thresh)>1){
loopCount=loopCount+1
if(loopCount>5){break()}
inGame = names(whichCT[whichCT<thresh])
subSpec = ctd[[annotLevel]]$specificity[,inGame]
subSpec = subSpec[!apply(subSpec,1,sum)==0,]
subSpec = subSpec/apply(subSpec,1,sum)
hits = hits[hits %in% rownames(subSpec)]
whichCT2 = rep(1,length(inGame))
names(whichCT2) = inGame
whichCT2 = get.ct.ps(cellTypesToConsider = inGame,
specificity = subSpec,
hitGenes = hits,
patchCellI = i,
patchSeqExp = patchdata_Reduced,
bg = bg_mouse[bg_mouse %in% rownames(subSpec)])
whichCT[names(whichCT2)]=whichCT2
print("loop")
}
if(i==1){
fullRes=whichCT
}else{
fullRes = rbind(fullRes,whichCT)
}
print(sort(whichCT))
assigned[i] = names(sort(whichCT)[1])
print(assigned[i])
print(i)
}
allRes = list(assigned=assigned,fullRes=fullRes)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.