###### Co-localization Methods for EvoWeaver #####
# author: Aidan Lakshman
# contact: ahl27@pitt.edu
#### Implemented Methods: ####
# - Naive GeneDistance
##########################
#### S3 Generic Definitions ####
GeneDistance <- function(ew, ...) UseMethod('GeneDistance')
MoransI <- function(ew, ...) UseMethod('MoransI')
OrientationMI <- function(ew, ...) UseMethod('OrientationMI')
################################
GeneDistance.EvoWeaver <- function(ew, Subset=NULL, Verbose=TRUE,
precalcProfs=NULL, precalcSubset=NULL,
minimumGenomeSize=2500,
CombinePVal=TRUE, ...){
if (!is.null(precalcSubset))
subs <- precalcSubset
else
subs <- ProcessSubset(ew, Subset)
uvals <- subs$uvals
evalmap <- subs$evalmap
stopifnot('Colocalization is disabled.'=attr(ew,'useColoc'))
if (attr(ew, 'useMT')){
labvecs <- lapply(ew[uvals], labels)
} else {
labvecs <- ew[uvals]
}
l <- length(labvecs)
allsp <- lapply(labvecs, \(x) gsub('([^_]+)_.*$', '\\1', x))
n <- names(ew)
ARGS <- list(allsp=allsp)
FXN <- function(lab1, lab2, ARGS, ii, jj){
l1sp <- gsub('([^_]+)_.*$', '\\1', lab1)
l2sp <- gsub('([^_]+)_.*$', '\\1', lab2)
l1chrom <- gsub("([^_]+_[^_]+)_.*", '\\1', lab1)
l2chrom <- gsub("([^_]+_[^_]+)_.*", '\\1', lab2)
shared <- intersect(l1sp, l2sp)
if(length(shared) == 0) return(ifelse(CombinePVal, 0, 0+0i))
score <- numeric(length(shared))
for ( k in seq_along(shared) ){
spk <- shared[k]
match1 <- match(spk, l1sp)
match2 <- match(spk, l2sp)
vec1 <- lab1[match1]
chrom1 <- l1chrom[match1]
vec2 <- lab2[match2]
chrom2 <- l2chrom[match2]
idx1 <- as.integer(gsub('.*_([0-9]*)$', '\\1', vec1, useBytes=TRUE))
idx2 <- as.integer(gsub('.*_([0-9]*)$', '\\1', vec2, useBytes=TRUE))
# Double loop is SIGNIFICANTLY faster than expand.grid
# ...even though it looks so gross :(
diffs <- Inf
for(i in seq_along(idx1)){
for(j in seq_along(idx2)){
if(chrom1[i] == chrom2[j]){
diffs <- min(diffs, abs(idx1[i] - idx2[j]))
}
}
}
if(diffs==0) diffs <- 1
score[k] <- ifelse(is.infinite(diffs), 0, exp(1-diffs))
}
pval <- 0
if(length(score) > 0){
rawscore <- -1*log(score)
rawscore[is.infinite(rawscore)] <- minimumGenomeSize
pvals <- (1-(rawscore / minimumGenomeSize))**2
pvals <- pmax(pvals, 0)
if(CombinePVal)
score <- mean(score * pvals)
else
score <- complex(real=mean(score), imaginary=mean(pvals))
} else {
score <- ifelse(CombinePVal, 0, 0+0i)
}
return(score)
}
pairscores <- BuildSimMatInternal(labvecs, uvals, evalmap, l, n,
FXN, ARGS, Verbose, CombinePVal,
InputIsList=TRUE)
if(CombinePVal){
m <- ifelse(max(pairscores, na.rm=TRUE) != 0, max(pairscores,na.rm=TRUE), 1)
pairscores <- pairscores / m
Diag(pairscores) <- 1
} else {
Diag(pairscores) <- 1+1i
}
return(pairscores)
}
MoransI.EvoWeaver <- function(ew, Subset=NULL, Verbose=TRUE,
MySpeciesTree=NULL,
precalcProfs=NULL, precalcSubset=NULL,
CombinePVal=TRUE, ...){
if (!is.null(precalcSubset))
subs <- precalcSubset
else
subs <- ProcessSubset(ew, Subset)
uvals <- subs$uvals
evalmap <- subs$evalmap
stopifnot('Colocalization is disabled.'=attr(ew,'useColoc'))
if ( is.null(MySpeciesTree) || !is(MySpeciesTree, 'dendrogram')){
MySpeciesTree <- findSpeciesTree(ew, Verbose)
}
stopifnot('Missing MySpeciesTree'=!is.null(MySpeciesTree))
stopifnot('MySpeciesTree must be a dendrogram'=is(MySpeciesTree, 'dendrogram'))
if (attr(ew, 'useMT')){
labvecs <- lapply(ew[uvals], labels)
} else {
labvecs <- ew[uvals]
}
l <- length(labvecs)
specCoph <- as.matrix(fastCoph(MySpeciesTree))
allsp <- lapply(labvecs, \(x) gsub('(.*)_.*$', '\\1', x))
n <- names(ew)
ARGS <- list(allsp=allsp, cMat=specCoph)
FXN <- function(lab1, lab2, ARGS, ii, jj){
l1sp <- gsub('([^_]*_[0-9]*)_.*$', '\\1', lab1)
l2sp <- gsub('([^_]*_[0-9]*)_.*$', '\\1', lab2)
shared <- intersect(l1sp, l2sp)
if (length(shared) <= 3){
return(ifelse(CombinePVal, 0, 0+0i))
}
score <- 0
vals <- rep(NA_real_, length(shared))
for ( k in seq_along(shared) ){
spk <- shared[k]
vec1 <- lab1[match(spk, l1sp)]
vec2 <- lab2[match(spk, l2sp)]
idx1 <- as.integer(gsub('.*_([0-9]*)$', '\\1', vec1, useBytes=TRUE))
idx2 <- as.integer(gsub('.*_([0-9]*)$', '\\1', vec2, useBytes=TRUE))
diffs <- 0
vals[k] <- min(abs(c(outer(as.integer(idx1), as.integer(idx2), '-'))))
vals[k] <- exp(-vals[k])
# exp(-sqrt(vals[k])) is a little better but feels overfit-y
}
if(all(vals==vals[1])){
# if there's 100% consistency it'll break MoransI,
# but honestly at that point isn't that great
# evidence of colocalization?
return(ifelse(CombinePVal, 1, 1+1i))
}
shared <- gsub('([^_]*).*', '\\1', shared)
w <- ARGS$cMat
w <- w[shared, shared]
w <- as.dist(exp(-w))
# two.sided or less ?
# 1-greater is better
res <- MoranI(vals, w, alternative = 'greater')
#score <- mean(vals)
score <- res$observed - res$expected
pval <- 1-res$p.value
return(ifelse(CombinePVal, score*pval, complex(real=score, imaginary=pval)))
}
pairscores <- BuildSimMatInternal(labvecs, uvals, evalmap, l, n,
FXN, ARGS, Verbose, CombinePVal,
InputIsList=TRUE)
#m <- ifelse(max(pairscores, na.rm=TRUE) != 0, max(pairscores,na.rm=TRUE), 1)
#pairscores <- pairscores / m
Diag(pairscores) <- ifelse(CombinePVal, 1, 1+1i)
return(pairscores)
}
OrientationMI.EvoWeaver <- function(ew, Subset=NULL, Verbose=TRUE,
precalcProfs=NULL, precalcSubset=NULL,
CombinePVal=TRUE, ...){
stopifnot('Some labels are missing strand identifiers!'=attr(ew, 'useStrand'))
if (!is.null(precalcSubset))
subs <- precalcSubset
else
subs <- ProcessSubset(ew, Subset)
uvals <- subs$uvals
evalmap <- subs$evalmap
stopifnot('Colocalization is disabled.'=attr(ew,'useColoc'))
if (attr(ew, 'useMT')){
labvecs <- lapply(ew[uvals], labels)
} else {
labvecs <- ew[uvals]
}
l <- length(labvecs)
allsp <- lapply(labvecs, \(x) gsub('([^_]+)_.*$', '\\1', x))
n <- names(ew)
ARGS <- list(allsp=allsp)
FXN <- function(lab1, lab2, ARGS, ii, jj){
l1sp <- gsub('([^_]*_[^_]*)_.*$', '\\1', lab1)
l2sp <- gsub('([^_]*_[^_]*)_.*$', '\\1', lab2)
shared <- intersect(l1sp, l2sp)
if(length(shared) == 0) return(ifelse(CombinePVal, 0, 0+0i))
score <- 0
vals <- rep(NA_real_, length(shared))
## a pseudocount of 1 is added
conttable <- matrix(rep(1,4), nrow=2)
for ( k in seq_along(shared) ){
spk <- shared[k]
vec1 <- lab1[match(spk, l1sp)]
vec2 <- lab2[match(spk, l2sp)]
idx1 <- as.integer(gsub('.*_([01])_[0-9]*$', '\\1', vec1, useBytes=TRUE))
idx2 <- as.integer(gsub('.*_([01])_[0-9]*$', '\\1', vec2, useBytes=TRUE))
gene1 <- as.integer(gsub('.*_([0-9]*)$', '\\1', vec1, useBytes=TRUE))
gene2 <- as.integer(gsub('.*_([0-9]*)$', '\\1', vec2, useBytes=TRUE))
for (x in seq_along(idx1)){
if(gene1[x] > gene2[x]){
v1 <- idx2[x]+1
v2 <- idx1[x]+1
} else {
v1 <- idx1[x]+1
v2 <- idx2[x]+1
}
conttable[v1,v2] <- conttable[v1,v2]+1
}
}
totalc <- sum(conttable)
px1 <- sum(conttable[1,]) / totalc
py1 <- sum(conttable[1,]) / totalc
pmf <- conttable / totalc
mutinf <- 0
vx1 <- rep(c(px1, 1-px1), times=2)
vy1 <- rep(c(py1, 1-py1), 2)
jointent <- 0
for(i in seq_len(4L)){
if(pmf[i] != 0){
mutinf <- mutinf +
ifelse(i%in%2:3,-1,1)*pmf[i]*log2(pmf[i] / (vx1[i]*vy1[i]))
jointent <- jointent + pmf[i]*log2(pmf[i])
}
}
# mutinf <- pmf[1,1]*log2(pmf[1,1] / (px1*py1)) +
# -1*pmf[1,2]*log2(pmf[1,2] / (px1*(1-py1))) +
# -1*pmf[2,1]*log2(pmf[2,1] / ((1-px1)*py1)) +
# pmf[2,2]*log2(pmf[2,2] / ((1-px1)*(1-py1)))
#jointentropy <- -1*sum(pmf*log2(pmf))
mutinf <- ifelse(jointent==0, mutinf, mutinf / jointent)
pval <- 1-fisher.test(conttable)$p.value
mutinf <- abs(mutinf)
return(ifelse(CombinePVal, mutinf*pval, complex(real=mutinf,imaginary=pval)))
}
pairscores <- BuildSimMatInternal(labvecs, uvals, evalmap, l, n,
FXN, ARGS, Verbose, CombinePVal,
InputIsList=TRUE)
#m <- ifelse(max(pairscores, na.rm=TRUE) != 0, max(pairscores,na.rm=TRUE), 1)
#pairscores <- pairscores / m
Diag(pairscores) <- ifelse(CombinePVal, 1, 1+1i)
return(pairscores)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.