select_target_gene<-function(rds_object, bc_frame,perturb_gene, non_target_ctrl,
assay_for_expcor='MAGIC_RNA', # assays used for estimating RNAs
min_gene_num=10,
max_gene_num=500,
logfc.threshold=0.1,
min_abs_diff=0.1,
perturb_gene_exp_id=NULL){
# old method: directly use the "gene" column in Seurat metadata
# note: not working for high moi screens
#deframe=FindMarkers(rds_object,ident.1=perturb_gene,ident.2 = non_target_ctrl,
# group.by = 'gene',logfc.threshold = logfc.threshold)
if (!'gene'%in% colnames(bc_frame) | ! 'cell'%in% colnames(bc_frame)){
stop('barcode frame must include columns named gene or cell.')
}
cell_perturb=bc_frame[bc_frame$gene%in%perturb_gene,'cell']
cell_ctrl=bc_frame[bc_frame$gene %in% non_target_ctrl,'cell']
cell_perturb=cell_perturb[!cell_perturb%in% cell_ctrl]
cell_perturb=cell_perturb[cell_perturb %in% Cells(rds_object)]
cell_ctrl=cell_ctrl[cell_ctrl %in% Cells(rds_object)]
if(length(cell_perturb)<10){
if(length(cell_perturb)==0){
stop(paste('No cells express sgRNAs targeting perturbed gene:',perturb_gene,'. Check whether (1) your cell names in barcode file match cell names in your Seurat object; and (2) there are cells that express sgRNAs targeting ',perturb_gene,'.'))
}else{
warnings(paste('Only ',length(cell_perturb),'cells express sgRNAs targeting perturbed gene:',perturb_gene))
}
}
if(length(cell_ctrl)<10){
if(length(cell_ctrl)==0){
stop(paste('No cells express negative control sgRNAs:',non_target_ctrl,'. Check whether (1) your cell names in barcode file match cell names in your Seurat object; and (2) there are cells that express sgRNAs targeting ',non_target_ctrl,'.'))
}else{
warnings(paste('Only ',length(cell_ctrl),'cells express sgRNAs targeting perturbed gene:',non_target_ctrl))
}
}
cells_perturb_meta=rep('Others',length(Cells(rds_object)))
names(cells_perturb_meta)=Cells(rds_object)
cells_perturb_meta[cell_perturb]='Perturbed'
cells_perturb_meta[cell_ctrl]='Ctrl'
rds_object=AddMetaData(rds_object,cells_perturb_meta,col.name='scmageck_perturbed_marker')
# automatically adjust the number of min genes
nround=0
while(TRUE){
deframe=FindMarkers(rds_object,ident.1='Perturbed',indent.2='Ctrl',
group.by='scmageck_perturbed_marker',
logfc.threshold = logfc.threshold)
target_gene_list=rownames(deframe)
message(paste('DE analysis identified ',nrow(deframe),'DE genes that will be used for target genes.'))
nround = nround + 1
if(nround >=3 | nrow(deframe) >= min_gene_num){
break
}
logfc.threshold = logfc.threshold * 0.8
message(paste('Automatically adjusting the logfc.threshold to ',logfc.threshold,' to search for more target genes.'))
}
corfr_sel=NULL
if(nrow(deframe)>max_gene_num){
deframe=deframe[order(deframe$p_val),]
target_gene_list=rownames(deframe)[1:max_gene_num]
message(paste('Select',max_gene_num,'genes'))
}else{
if(nrow(deframe)<min_gene_num & !is.null(perturb_gene_exp_id) & FALSE){ # this has been disabled
# select genes from expression correlation
message('Not enough genes from differential expression. Select genes from correlation...')
targetls<-get_exp_cor_frame(rds_object,perturb_gene,non_target_ctrl,
assay_for_expcor = assay_for_expcor,
cell1s=cell_perturb,cell_ctrl=cell_ctrl,
perturb_gene_exp_id=perturb_gene_exp_id)
corfr<-targetls$cordata
corfr_sel=subset(corfr, abs(cor_target)-abs(cor_ctrl)>min_abs_diff)
corfr_sel=corfr_sel[order(abs(corfr_sel$cor_ctrl),decreasing = T),]
remain_gene=min(nrow(corfr_sel),min_gene_num-nrow(deframe))
message(paste('Remaining',remain_gene,'genes selected..'))
target_gene_list=c(target_gene_list,corfr_sel$gene[1:remain_gene])
}
}
return (list(target_gene_list=target_gene_list,
deframe=deframe,
corframe=corfr_sel))
}
TRUE
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.