#' Filter peak-to-gene link
#' @description Extract peak to gene links data.frame from ArchR object, then identify
#' significant links based on corCutOff, FDRCutOff, varCutOffATAC and varCutOffRNA.
#' @param ArchR_obj ArchR object with Peak2GeneLinks information in peakset slot
#' @param corCutOff numeric, correlation between each peak and gene to refine p2g
#' @param FDRCutOff numeric, FDR of peak to gene link to refine p2g
#' @param varCutOffATAC numeric, VarQATAC to refine p2g
#' @param varCutOffRNA numeric, VarQRNA to refine p2g
#'
#' @return
#' @export
#'
#' @examples
Get_p2g_fun <- function(ArchR_obj,corCutOff = 0.20,FDRCutOff = 1e-6,
varCutOffATAC = 0.7,varCutOffRNA = 0.3){
validInput(corCutOff,'corCutOff','numeric')
validInput(FDRCutOff,'FDRCutOff','numeric')
validInput(varCutOffATAC,'varCutOffATAC','numeric')
validInput(varCutOffRNA,'varCutOffRNA','numeric')
p2g <- metadata(ArchR_obj@peakSet)$Peak2GeneLinks
p2g <- p2g[which(abs(p2g$Correlation) >= corCutOff & p2g$FDR <= FDRCutOff), ,drop=FALSE]
if(!is.null(varCutOffATAC)){
p2g <- p2g[which(p2g$VarQATAC > varCutOffATAC),]
}
if(!is.null(varCutOffRNA)){
p2g <- p2g[which(p2g$VarQRNA > varCutOffRNA),]
}
mATAC <- readRDS(metadata(p2g)$seATAC)[p2g$idxATAC, ]
mRNA <- readRDS(metadata(p2g)$seRNA)[p2g$idxRNA, ]
p2g$peak <- paste0(rowRanges(mATAC))
p2g$gene <- rowData(mRNA)$name
return(p2g)
}
#' Generate scATAC Candid list
#' @description
#' @param p2g peak to gene link file, generated by 'addPeak2GeneLinks' function
#' in ArchR R package
#' @param combined_footprints footprints file, generated by combine_footprints()
#' @param Kmeans_clustering_ENS Kmeans result data.frame, the first column should be 'Symbol'
#'
#' @return
#' @export
#'
#' @examples
generate_scATAC_Candid <- function(p2g,combined_footprints,Kmeans_clustering_ENS){
validInput(combined_footprints,'combined_footprints','df')
validInput(Kmeans_clustering_ENS,'Kmeans_clustering_ENS','df')
if (!is(CM_0H_48H_p2g,"DFrame")) {
stop('Input value for p2g is not a DFrame, please supply valid input!')
}
pg <- data.frame(p2g@listData[["peak"]],p2g@listData[["gene"]])
pg <- pg[pg[,2]%in%Kmeans_clustering_ENS[,1],]
colnames(pg) <- c('peak','gene')
pg <- pg[order(pg$peak),]
colnames(pg) <- c('peak','gene')
peak_grouped <- dplyr::group_by(pg,peak)
peak_grouped$gene<- paste0(peak_grouped$gene,'|Unknown')
peak_all_gene=dplyr::group_map(peak_grouped,~get_genes(.x))
peak_all_gene <-unlist(peak_all_gene)
peak_name <- pg[!duplicated(pg$peak),1]
peak_region <- unlist(strsplit(peak_name,':'))
index_chr <- seq(1,length(peak_region),2)
peak_region_chr <- peak_region[index_chr]
peak_region_se <- peak_region[-index_chr]
peak_data <- as.data.frame(t(as.data.frame(strsplit(peak_region_se,'-'))))
peak_data$chr <- peak_region_chr
peak_data <- peak_data[,c(3,1,2)]
overlapped <- overlap_footprints_peaks(combined_footprints,peak_data)
footprintslist <- merge_extent_footprints(overlapped, Tranfac201803_Mm_MotifTFsF)
group_peak3=merge_extent_footprints2(overlapped, Tranfac201803_Mm_MotifTFsF)
footprintslist[[2]]$strand <- peak_all_gene[match(paste(group_peak3[,8]
,group_peak3[,9],
group_peak3[,10]),
paste(peak_data[,1],
peak_data[,2],
peak_data[,3]))]
return(footprintslist)
}
get_genes <- function(data1){
gene<-paste(as.character(data1$gene),collapse = ';')
return(gene)
}
merge_extent_footprints2 <- function(file1, motif1) {
file1 <- file1[file1[,7] %in% motif1$Accession,]
peak_region <- paste(as.character(file1[,1]), file1[,2],
file1[,3], file1[,4], sep = "\t")
file1$peak_region <- peak_region
file1$tf <- motif1[match(file1[,7],motif1[,1]),5]
group_peak <- dplyr::group_by(file1,peak_region)
group_peak <- group_peak[order(group_peak$peak_region),]
group_peak2 <-dplyr::group_map(group_peak,~merge_group(.x))
group_peak3 <- as.data.frame(group_peak[!duplicated(group_peak$peak_region),])
return(group_peak3)
}
merge_group <- function(data1){
data1 <- as.data.frame(data1)
motif <- as.character(data1[,7])
related_gene <- as.character(data1[,11])
mg <- paste(motif,related_gene,sep = ';',collapse = '|')
return(mg)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.