R/scATAC.R

Defines functions merge_group merge_extent_footprints2 get_genes generate_scATAC_Candid Get_p2g_fun

Documented in generate_scATAC_Candid Get_p2g_fun

#' Filter peak-to-gene link
#' @description Extract peak to gene links data.frame from ArchR object, then identify
#' significant links based on corCutOff, FDRCutOff, varCutOffATAC and varCutOffRNA.
#' @param ArchR_obj ArchR object with Peak2GeneLinks information in peakset slot
#' @param corCutOff numeric, correlation between each peak and gene to refine p2g
#' @param FDRCutOff numeric, FDR of peak to gene link to refine p2g
#' @param varCutOffATAC numeric, VarQATAC to refine p2g
#' @param varCutOffRNA numeric, VarQRNA to refine p2g
#'
#' @return
#' @export
#'
#' @examples
Get_p2g_fun <- function(ArchR_obj,corCutOff = 0.20,FDRCutOff = 1e-6,
                        varCutOffATAC = 0.7,varCutOffRNA = 0.3){
  validInput(corCutOff,'corCutOff','numeric')
  validInput(FDRCutOff,'FDRCutOff','numeric')
  validInput(varCutOffATAC,'varCutOffATAC','numeric')
  validInput(varCutOffRNA,'varCutOffRNA','numeric')

  p2g <- metadata(ArchR_obj@peakSet)$Peak2GeneLinks
  p2g <- p2g[which(abs(p2g$Correlation) >= corCutOff & p2g$FDR <= FDRCutOff), ,drop=FALSE]
  if(!is.null(varCutOffATAC)){
    p2g <- p2g[which(p2g$VarQATAC > varCutOffATAC),]
  }
  if(!is.null(varCutOffRNA)){
    p2g <- p2g[which(p2g$VarQRNA > varCutOffRNA),]
  }
  mATAC <- readRDS(metadata(p2g)$seATAC)[p2g$idxATAC, ]
  mRNA <- readRDS(metadata(p2g)$seRNA)[p2g$idxRNA, ]
  p2g$peak <- paste0(rowRanges(mATAC))
  p2g$gene <- rowData(mRNA)$name
  return(p2g)
}



#' Generate scATAC Candid list
#' @description
#' @param p2g peak to gene link file, generated by 'addPeak2GeneLinks' function
#' in ArchR R package
#' @param combined_footprints footprints file, generated by combine_footprints()
#' @param Kmeans_clustering_ENS Kmeans result data.frame, the first column should be 'Symbol'
#'
#' @return
#' @export
#'
#' @examples
generate_scATAC_Candid <- function(p2g,combined_footprints,Kmeans_clustering_ENS){
  validInput(combined_footprints,'combined_footprints','df')
  validInput(Kmeans_clustering_ENS,'Kmeans_clustering_ENS','df')
  if (!is(CM_0H_48H_p2g,"DFrame")) {
    stop('Input value for p2g is not a DFrame, please supply valid input!')
  }
  pg <- data.frame(p2g@listData[["peak"]],p2g@listData[["gene"]])
  pg <- pg[pg[,2]%in%Kmeans_clustering_ENS[,1],]
  colnames(pg) <- c('peak','gene')
  pg <- pg[order(pg$peak),]
  colnames(pg) <- c('peak','gene')
  peak_grouped <- dplyr::group_by(pg,peak)
  peak_grouped$gene<- paste0(peak_grouped$gene,'|Unknown')
  peak_all_gene=dplyr::group_map(peak_grouped,~get_genes(.x))
  peak_all_gene <-unlist(peak_all_gene)
  peak_name <- pg[!duplicated(pg$peak),1]
  peak_region <- unlist(strsplit(peak_name,':'))
  index_chr <- seq(1,length(peak_region),2)
  peak_region_chr <- peak_region[index_chr]
  peak_region_se <- peak_region[-index_chr]
  peak_data <- as.data.frame(t(as.data.frame(strsplit(peak_region_se,'-'))))
  peak_data$chr <- peak_region_chr
  peak_data <- peak_data[,c(3,1,2)]
  overlapped <- overlap_footprints_peaks(combined_footprints,peak_data)
  footprintslist <- merge_extent_footprints(overlapped, Tranfac201803_Mm_MotifTFsF)
  group_peak3=merge_extent_footprints2(overlapped, Tranfac201803_Mm_MotifTFsF)
  footprintslist[[2]]$strand <- peak_all_gene[match(paste(group_peak3[,8]
                                                                 ,group_peak3[,9],
                                                                 group_peak3[,10]),
                                                           paste(peak_data[,1],
                                                                 peak_data[,2],
                                                                 peak_data[,3]))]
  return(footprintslist)
}



get_genes <- function(data1){
  gene<-paste(as.character(data1$gene),collapse = ';')
  return(gene)
}

merge_extent_footprints2 <- function(file1, motif1) {
  file1 <- file1[file1[,7] %in% motif1$Accession,]
  peak_region <- paste(as.character(file1[,1]), file1[,2],
                       file1[,3], file1[,4], sep = "\t")
  file1$peak_region <- peak_region
  file1$tf <- motif1[match(file1[,7],motif1[,1]),5]
  group_peak <- dplyr::group_by(file1,peak_region)
  group_peak <- group_peak[order(group_peak$peak_region),]
  group_peak2 <-dplyr::group_map(group_peak,~merge_group(.x))
  group_peak3 <- as.data.frame(group_peak[!duplicated(group_peak$peak_region),])
  return(group_peak3)
}

merge_group <- function(data1){
  data1 <- as.data.frame(data1)
  motif <- as.character(data1[,7])
  related_gene <- as.character(data1[,11])
  mg <- paste(motif,related_gene,sep = ';',collapse = '|')
  return(mg)
}
jiang-junyao/IReNA documentation built on Nov. 4, 2024, 8:29 p.m.