###### -- Select the true-est ortholog ----------------------------------------
# author: nicholas cooley
# maintainer: nicholas cooley
# contact: npc19@pitt.edu
# hunger games of predicted pairs
CompetePairs <- function(SynExtendObject,
AllowCrossContigConflicts = TRUE, # if EITHER assembly isn't 'complete' this should be FALSE
By = "PID",
PollContext = TRUE,
NormalizeCompetition = TRUE,
InflationParameter = .975,
Verbose = FALSE) {
# start with timing
if (Verbose) {
pBar <- txtProgressBar(style = 1)
FunctionTimeStart <- Sys.time()
}
# unit normalize if asked for...
NormScore <- function(vec) {
vec <- vec / sqrt(sum(vec^2))
return(vec)
}
# overhead checking
if (!is(object = SynExtendObject,
class2 = "PairSummaries")) {
stop ("SynExtendObject must be an object of class 'PairSummaries'.")
}
# i need to build a class file for this class, until then we need to do this:
attr_vals <- attributes(SynExtendObject)
attr_names <- names(attr_vals)
# if i allow polling of context, i need to make sure that
if (PollContext) {
if (!("Block_UID" %in% colnames(SynExtendObject))) {
stop ("Block unique IDs are missing")
}
}
if (length(By) != 1) {
stop ("Competition between conflicting pairs can only currently be resolved with a single colname.")
}
# check the compete by argument
if (!(By %in% colnames(SynExtendObject))) {
stop ("Competition between conflicting pairs can only be resolved with an available colname.")
}
if (PollContext & NormalizeCompetition) {
# bind a context score to easily access later
ContextScore <- tapply(X = NormScore(SynExtendObject[, By]),
INDEX = SynExtendObject$Block_UID,
FUN = function(x) {
sum(x) / (length(x) / InflationParameter)
})
SynExtendObject <- cbind(SynExtendObject,
"CompeteBy" = ContextScore[match(x = SynExtendObject$Block_UID,
table = as.integer(names(ContextScore)))])
} else if (PollContext & !NormalizeCompetition) {
# bind a context score to easily access later
ContextScore <- tapply(X = SynExtendObject[, By],
INDEX = SynExtendObject$Block_UID,
FUN = function(x) {
sum(x) / (length(x) / InflationParameter)
})
SynExtendObject <- cbind(SynExtendObject,
"CompeteBy" = ContextScore[match(x = SynExtendObject$Block_UID,
table = as.integer(names(ContextScore)))])
} else if (!PollContext & NormalizeCompetition) {
SynExtendObject <- cbind(SynExtendObject,
"CompeteBy" = NormScore(SynExtendObject[, By]))
} else if (!PollContext & !NormalizeCompetition) {
SynExtendObject <- cbind(SynExtendObject,
"CompeteBy" = SynExtendObject[, By])
}
res01 <- vector(mode = "character",
length = nrow(SynExtendObject))
res02 <- rep(TRUE,
nrow(SynExtendObject))
# divy object up into comparison sections to be evaluated
if (AllowCrossContigConflicts) {
# split the object based on genome_contig_
row_reps <- paste(gsub(pattern = "_[0-9]+$",
replacement = "",
x = SynExtendObject$p1),
gsub(pattern = "_[0-9]+$",
replacement = "",
x = SynExtendObject$p2),
sep = "_")
row_key <- unique(row_reps)
# return(list("rep" = row_reps,
# "key" = row_key,
# "origin" = SynExtendObject))
} else {
# split the object based on genome_
row_reps <- paste(gsub(pattern = "_[0-9+]_[0-9]+$",
replacement = "",
x = SynExtendObject$p1),
gsub(pattern = "_[0-9]+_[0-9]+$",
replacement = "",
x = SynExtendObject$p2),
sep = "_")
row_key <- unique(row_reps)
# return(list("rep" = row_reps,
# "key" = row_key,
# "origin" = SynExtendObject))
}
eval_sets <- split(x = SynExtendObject,
f = row_reps)
eval_sets <- lapply(X = eval_sets,
FUN = function(x) {
class(x) <- c("data.frame",
"PairSummaries")
return(x)
})
# return(eval_sets)
if (Verbose) {
PBAR <- length(eval_sets)
}
# loop through our evaluation sets, return some kind of vector or other object
# that tells me who to knock out, and *who it was knocked out by*
for (m1 in seq_along(eval_sets)) {
current_sets <- DisjointSet(Pairs = eval_sets[[m1]],
Verbose = FALSE)
communities01 <- rep(as.integer(names(current_sets)),
lengths(current_sets))
names(communities01) <- unlist(current_sets)
communities02 <- unname(communities01[match(x = eval_sets[[m1]]$p1,
table = names(communities01))])
# return(list("set" = current_sets,
# "communities01" = communities01,
# "communities02" = communities02))
pairs_by_community <- split(x = eval_sets[[m1]],
f = communities02)
# row names here are relative to ...
conflict_pairs <- pairs_by_community[vapply(X = pairs_by_community,
FUN = function(x) {
nrow(x) > 1
},
FUN.VALUE = vector(mode = "logical",
length = 1))]
if (length(conflict_pairs) > 0) {
conflict_rows <- unlist(unname(lapply(X = conflict_pairs,
FUN = function(x) {
rownames(x)
})))
keep <- vector(mode = "list",
length = length(conflict_pairs))
winner <- vector(mode = "character",
length = length(conflict_pairs))
# return a list of vectors of logicals and a vector of characters that indicates
# WHO won the competition
# we can propogate these back to the original data frame based on the rownames
for (m2 in seq_along(conflict_pairs)) {
# return(list("current" = eval_sets[[m1]],
# "conflict" = conflict_pairs[[m2]]))
winner[m2] <- rownames(conflict_pairs[[m2]])[which.max(conflict_pairs[[m2]]$CompeteBy)]
keep[[m2]] <- rownames(conflict_pairs[[m2]]) %in% winner[m2]
# if (m1 == 2 &
# m2 == 1) {
# return(list("origin" = SynExtendObject,
# "current" = eval_sets[[m1]],
# "conflicts" = conflict_pairs[[m2]],
# "winner" = winner[m2],
# "keep" = keep[[m2]]))
# }
}
# return(list("origin" = SynExtendObject,
# "current" = eval_sets[[m1]],
# "conflicts" = conflict_pairs,
# "row_key" = conflict_rows,
# "winner" = winner,
# "keep" = keep))
winner <- rep(winner,
lengths(keep))
# fill in which row knocked you out if you were knocked out
res01[match(x = conflict_rows,
table = rownames(SynExtendObject))] <- winner
# fill in whether you were knocked out
res02[match(x = conflict_rows,
table = rownames(SynExtendObject))] <- unlist(keep)
# post conflict, see if any of the removed pairs have BOTH PARTNERS
# missing in the winners set, if so add them back
# this needs to be rewritten as some kind of competition itself
# check_conflicts <- do.call(rbind,
# conflict_pairs)
# retained_p1 <- check_conflicts$p1[unlist(keep)]
# retained_p2 <- check_conflicts$p2[unlist(keep)]
#
# rescue_p1 <- !(check_conflicts$p1 %in% retained_p1)
# rescue_p2 <- !(check_conflicts$p2 %in% retained_p2)
#
# if (any(rescue_p1 & rescue_p2)) {
# # return(list("a" = SynExtendObject,
# # "b" = eval_sets[[m1]],
# # "c" = check_conflicts[, c(1:2,11:12,16)],
# # "d" = unlist(keep),
# # "e" = list(retained_p1,
# # retained_p2),
# # "f" = list(rescue_p1,
# # rescue_p2)))
# res02[match(x = conflict_rows,
# table = rownames(SynExtendObject))][which(rescue_p1 & rescue_p2)] <- TRUE
# }
} # end check for checkable conflicts
if (Verbose) {
setTxtProgressBar(pb = pBar,
value = m1 / PBAR)
}
}
if (Verbose) {
close(pBar)
}
# until the pair summaries class has a class file i need to do this...
attrs_to_add <- attr_names[!(attr_names) %in% names(attributes(SynExtendObject))]
# return(list("a" = attr_names,
# "b" = attr_vals,
# "c" = attrs_to_add,
# "d" = SynExtendObject))
for (m1 in seq_along(attrs_to_add)) {
attr(x = SynExtendObject,
which = attrs_to_add[m1]) <- attr_vals[[attrs_to_add[m1]]]
}
attr(x = SynExtendObject,
which = "RetainByCompetition") <- res02
attr(x = SynExtendObject,
which = "Knockout") <- res01
class(SynExtendObject) <- c("data.frame", "PairSummaries")
if (Verbose) {
FunctionTimeEnd <- Sys.time()
print(FunctionTimeEnd - FunctionTimeStart)
}
return(SynExtendObject)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.