## Functions for linking causal SNPs to genes after gChromVAR analysis
#' Import causal snp bed files
#'
#' @param files Paths of bed files to import
#' @param colidx Index of the posterior probability column
#'
#' @return Causal SNPs as GRanges objects
#'
#' @import GenomicRanges
#' @export
#'
ImportCausalSNPs <- function(files, colidx = 5) {
lapply(files, function(file) {
# Import Bed
hitdf <- read.table(file)
stopifnot(dim(hitdf)[2] >= colidx)
stopifnot(3 < colidx)
gdf <- hitdf[,c(1,2,3,colidx)]
#Make GRanges
names(gdf) <- c("chr", "start", "end", "PP")
makeGRangesFromDataFrame(gdf, keep.extra.columns = T)
})
}
#' Identify most impactful SNPs for a given trait/cell type combo
#'
#' @param SE Accessibility counts as a Summarized Experiment object
#' @param traits Posterior probability score of each peak for every trait as a Summarized Experiment object
#' @param bg.peaks Matrix of background peaks. Will be computed with getBackgroundPeaks if not specified
#' @param min.z Z-score cutoff for peaks
#' @param min.PP Posterior probability score for peaks
#' @param return.mat Return all z-scores
#'
#' @return List of lists of causal peaks for each trait and cell type. Each trait/cell type pair has a dataframe with the z-score, accessibility deviation, and posterior probability for each causal peak
#'
#' @import GenomicRanges
#' @import Matrix
#' @export
#'
FindCausalPeaks <- function(SE, traits, bg.peaks = NULL, min.z = 1, min.PP = 0.025,
return.mat = F) {
require(chromVAR)
require(gchromVAR)
if(is.null(bg.peaks)) {
bg.peaks <- getBackgroundPeaks(SE)
rownames(bg.peaks) <- rownames(SE)
}
stopifnot(all(rownames(bg.peaks) == rownames(SE)))
stopifnot(all(rownames(traits) == rownames(SE)))
## Get overlap of each peak with SNPs from each trait
trait.scores <- assay(traits, "weights")
rownames(trait.scores) <- rownames(traits)
## Find peaks overlapping with causal SNPs for each trait
causal.peaks <- lapply(colnames(trait.scores), function(trait) {
w <- trait.scores[,trait]
w[w > 0]
})
names(causal.peaks) <- colnames(trait.scores)
causal.peaks <- causal.peaks[sapply(causal.peaks, length) > 0]
## Compute expected counts
counts <- assay(SE, "counts")
cluster.sums <- colSums(counts)
peak.frac <- computeExpectations(SE)
expected.counts <- sapply(cluster.sums, function(x) x*peak.frac)
## Compute deviation from expected counts
dev <- (counts - expected.counts)/expected.counts
## Compute individual peak-cluster z-scores for each trait using background peaks
causal.z <- lapply(names(causal.peaks), function(tr) {
w <- causal.peaks[[tr]]
peaks.use <- names(w)
w.exp <- w*as.matrix(expected.counts[peaks.use,])
w.dev <- (w*as.matrix(counts[peaks.use,]) - w.exp)/w.exp
bg.dev <- lapply(1:ncol(bg.peaks), function(i) {
ctrl.peaks <- rownames(counts)[bg.peaks[peaks.use,i]]
bg.exp <- w*as.matrix(expected.counts[ctrl.peaks,])
(w*as.matrix(counts[ctrl.peaks,]) - bg.exp)/bg.exp
})
bg.dev <- abind::abind(bg.dev, along = 3)
bg.dev.mean <- apply(bg.dev, c(1,2), mean)
bg.dev.sd <- apply(bg.dev, c(1,2), sd)
z <- (w.dev - bg.dev.mean)/bg.dev.sd
z.list <- lapply(colnames(z), function(i) {
df <- data.frame(peak = rownames(z), z = z[,i],
PP = trait.scores[rownames(z),tr],
dev = dev[rownames(z),i])
subset(df[order(df$z, decreasing = T),], z > min.z & PP > min.PP)
})
names(z.list) <- colnames(z)
if (return.mat) {
return(z)
} else {
return(z.list)
}
})
names(causal.z) <- names(causal.peaks)
return(causal.z)
}
#' Find genes coaccessible with causal peaks for each cluster/trait pair
#'
#' @param pheno.cl.use Trait/cluster pairs to analyze, as a character vector of "trait: cluster" elements
#' @param causal.z List of lists of causal peaks for each trait and cell type generated by FindCausalPeaks
#' @param peak.gene.weights Matrix of peak - gene coaccessibilities
#' @param min.weight Minimum coaccessibility to consider
#'
#' @return Matrix describing the coaccessibility of specified trait/cluster pairs with genes
#'
#' @export
#'
FindLinkedGenes <- function(pheno.cl.use, causal.z, peak.gene.weights, min.weight = 0.1) {
## Find peaks associated with each trait/cluster pair
pheno.cluster.peaks <- lapply(pheno.cl.use, function(pheno.cl) {
p <- strsplit(pheno.cl, split = ": ")[[1]][[1]]
cl.use <- strsplit(pheno.cl, split = ": ")[[1]][[2]]
peaks <- unique(as.character(causal.z[[p]][[cl.use]]$peak))
peaks[peaks %in% rownames(peak.gene.weights)]
})
names(pheno.cluster.peaks) <- pheno.cl.use
## Get coaccessible genes as list
pheno.cluster.genes <- lapply(pheno.cluster.peaks, function(peaks) {
if (length(peaks) > 1) {
w <- colSums(as.matrix(peak.gene.weights[peaks,]))
} else if (length(peaks) == 1) {
w <- peak.gene.weights[peaks,]
} else {
w <- c()
}
w[w >= min.weight]
})
all.pheno.genes <- unique(unlist(lapply(pheno.cluster.genes, names), F, F))
## Bind coaccessible gene weights into matrix
pheno.genes.mat <- do.call(cbind, lapply(names(pheno.cluster.genes), function(x) {
w <- pheno.cluster.genes[[x]]
w <- w[names(w) %in% all.pheno.genes]
v <- rep(0, length(all.pheno.genes))
names(v) <- all.pheno.genes
v[names(w)] <- w
v
}))
colnames(pheno.genes.mat) <- pheno.cl.use
return(pheno.genes.mat)
}
#' Find the posterior probability score of causal peaks coaccessible with genes for specified trait/cluster pairs
#'
#' @param pheno.genes.mat Matrix describing the coaccessibility of specified trait/cluster pairs with genes. Generated by FindLinkedGenes.
#' @param causal.z List of lists of causal peaks for each trait and cell type generated by FindCausalPeaks
#' @param peak.gene.weights Matrix of peak - gene coaccessibilities
#'
#' @return Matrix of the sum of posterior probabilities of causal snps driving the coaccessibility of specified trait/cluster pairs with genes
#'
#' @export
#'
FindCausalProb <- function(pheno.genes.mat, causal.z, peak.gene.weights) {
pheno.pp.mat <- sapply(colnames(pheno.genes.mat), function(pheno.cl) {
p <- strsplit(pheno.cl, split = ": ")[[1]][[1]]
cl.use <- strsplit(pheno.cl, split = ": ")[[1]][[2]]
df <- causal.z[[p]][[cl.use]]
peaks <- unique(as.character(causal.z[[p]][[cl.use]]$peak))
peaks[peaks %in% rownames(peak.gene.weights)]
peaks <- peaks[peaks %in% rownames(peak.gene.weights)]
if (length(peaks) > 0) {
w <- sapply(peaks, function(pk) {
x <- peak.gene.weights[pk, rownames(pheno.genes.mat)]
x[x > 0] <- df[pk, "PP"]
x
})
if (!is.vector(w)) w <- rowSums(w)
}
else {
w <- rep(0, nrow(pheno.genes.mat))
names(w) <- rownames(pheno.genes.mat)
}
return(w)
})
pheno.pp.mat <- pheno.pp.mat[rownames(pheno.genes.mat), colnames(pheno.genes.mat)]
pheno.pp.mat[pheno.genes.mat == 0] <- 0
return(pheno.pp.mat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.