###### -- summarize seqs ------------------------------------------------------
# author: nicholas cooley
# maintainer: nicholas cooley
# contact: npc19@pitt.edu
# given a linked pairs object, and a FeatureSeqs object return a pairsummaries
# object
# this function will always align, unlike PairSummaries
# TODO:
# implement:
# ShowPlot
# Processors -- partially implemented, at least for AlignPairs
# ellipses
# SearchIndex subsetting to only search non-occupied spaces -- erik thinks we can avoid this
SummarizePairs <- function(SynExtendObject,
DataBase01,
IncludeIndexSearch = TRUE,
AlignmentFun = "AlignPairs",
RetainAnchors = TRUE,
DefaultTranslationTable = "11",
KmerSize = 5,
IgnoreDefaultStringSet = FALSE,
Verbose = FALSE,
ShowPlot = FALSE,
Processors = 1,
Storage = 2,
IndexParams = list("K" = 6),
SearchParams = list("perPatternLimit" = 1),
...) {
if (Verbose) {
TimeStart <- Sys.time()
pBar <- txtProgressBar(style = 1L)
}
# overhead checking
# object types
if (!is(object = SynExtendObject,
class2 = "LinkedPairs")) {
stop ("'SynExtendObject' is not an object of class 'LinkedPairs'.")
}
Size <- nrow(SynExtendObject)
# if (!is(object = FeatureSeqs,
# class2 = "FeatureSeqs")) {
# stop ("'FeatureSeqs' is not an object of class 'FeatureSeqs'.")
# }
# we should only need to talk to the DataBase IF FeatureSeqs is not the right length
if (is.character(DataBase01)) {
if (!requireNamespace(package = "RSQLite",
quietly = TRUE)) {
stop("Package 'RSQLite' must be installed.")
}
if (!("package:RSQLite" %in% search())) {
print("Eventually character vector access to DECIPHER DBs will be deprecated.")
requireNamespace(package = "RSQLite",
quietly = TRUE)
}
dbConn <- dbConnect(dbDriver("SQLite"), DataBase01)
on.exit(dbDisconnect(dbConn))
} else {
dbConn <- DataBase01
if (!dbIsValid(dbConn)) {
stop("The connection has expired.")
}
}
if (!is.character(AlignmentFun) |
length(AlignmentFun) > 1) {
stop("AlignmentFun must be either 'AlignPairs' or 'AlignProfiles'.")
}
if (!(AlignmentFun %in% c("AlignPairs",
"AlignProfiles"))) {
stop("AlignmentFun must be either 'AlignPairs' or 'AlignProfiles'.")
}
if (!is.character(DefaultTranslationTable) |
length(DefaultTranslationTable) > 1) {
stop("DefaultTranslationTable must be a character of length 1.")
}
# check storage
if (Storage < 0) {
stop("Storage must be greater than zero.")
} else {
Storage <- Storage * 1e9 # conversion to gigabytes
}
# deal with Processors, this mimics Erik's error checking
if (!is.null(Processors) && !is.numeric(Processors)) {
stop("Processors must be a numeric.")
}
if (!is.null(Processors) && floor(Processors) != Processors) {
stop("Processors must be a whole number.")
}
if (!is.null(Processors) && Processors < 1) {
stop("Processors must be at least 1.")
}
if (is.null(Processors)) {
Processors <- DECIPHER:::.detectCores()
} else {
Processors <- as.integer(Processors)
}
# deal with user arguments
# ignore 'verbose'
UserArgs <- list(...)
# if (length(UserArgs) > 0) {
# UserArgNames <- names(UserArgs)
# AlignPairsArgs <- formals(AlignPairs)
# AlignPairsArgNames <- names(AlignPairsArgs)
# AlignProfilesArgs <- formals(AlignProfiles)
# AlignProfilesArgNames <- names(AlignProfilesArgs)
# DistanceMatrixArgs <- formals(DistanceMatrix)
# DistanceMatrixArgNames <- names(DistanceMatrixArgs)
#
# APaArgNames <- APrArgNames <- DMArgNames <- vector(mode = "character",
# length = length(UserArgNames))
# for (m1 in seq_along(UserArgNames)) {
# APaArgNames[m1] <- match.arg(arg = UserArgNames[m1],
# choices = AlignPairsArgNames)
# APrArgNames[m1] <- match.arg(arg = UserArgNames[m1],
# choices = AlignProfilesArgNames)
# DMArgNames[m1] <- match.arg(arg = UserArgNames[m1],
# choices = DistanceMatrixArgNames)
# }
# # set the user args for AlignProfiles
# if (any(APaArgNames)) {
#
# }
# # set the user args for AlignPairs
# # set the user args for DistanceMatrix
# }
GeneCalls <- attr(x = SynExtendObject,
which = "GeneCalls")
GeneCallIDs <- names(GeneCalls)
ObjectIDs <- rownames(SynExtendObject)
# when we subset a LinkedPairsObject it doesn't smartly handle the genecalls yet...
if (!all(ObjectIDs %in% GeneCallIDs)) {
stop("Function expects all IDs in the SynExtendObject to supplied GeneCalls.")
}
IDMatch <- match(x = ObjectIDs,
table = GeneCallIDs)
GeneCalls <- GeneCalls[IDMatch]
# we're only going to scroll through the cells that are supplied in the object
DataPool <- vector(mode = "list",
length = length(ObjectIDs))
MAT1 <- get(data("HEC_MI1",
package = "DECIPHER",
envir = environment()))
MAT2 <- get(data("HEC_MI2",
package = "DECIPHER",
envir = environment()))
# set initial progress bars and iterators
# PH is going to be the container that 'res' eventually gets constructed from
# while Total
if (AlignmentFun == "AlignProfiles") {
# progress bar ticks through each alignment
# this is the slower non-default option
Total <- (Size - (Size - 1L)) / 2
PH <- vector(mode = "list",
length = Total)
Total <- sum(sapply(SynExtendObject[upper.tri(SynExtendObject)],
function(x) nrow(x),
USE.NAMES = FALSE,
simplify = TRUE))
structureMatrix <- matrix(c(0.187, -0.8, -0.873,
-0.8, 0.561, -0.979,
-0.873, -0.979, 0.221),
3,
3,
dimnames=list(c("H", "E", "C"),
c("H", "E", "C")))
substitutionMatrix <- matrix(c(1.5, -2.134, -0.739, -1.298,
-2.134, 1.832, -2.462, 0.2,
-0.739, -2.462, 1.522, -2.062,
-1.298, 0.2, -2.062, 1.275),
nrow = 4,
dimnames = list(DNA_BASES, DNA_BASES))
# read this message after neighbors and k-mer dists ?
if (Verbose) {
cat("Aligning pairs.\n")
}
} else if (AlignmentFun == "AlignPairs") {
# progress bar ticks through each cell
# this is the faster default option
# technically these alignments have the possibility of not being
# as good as align profiles, but that's a hit we're willing to take
Total <- (Size * (Size - 1L)) / 2L
PH <- vector(mode = "list",
length = Total)
Total <- Total * 2L
if (Verbose) {
cat("Collecting pairs.\n")
}
}
# for testing:
# NT_Anchors <- AA_Anchors <- vector(mode = "list",
# length = length(PH))
Count <- 1L
PBCount <- 0L
# upper key!
# QueryGene == 1
# SubjectGene == 2
# ExactOverlap == 3
# QueryIndex == 4
# SubjectIndex == 5
# QLeft == 6
# QRight == 7
# SLeft == 8
# SRight == 9
# MaxKmer == 10
# TotalKmer == 11
# lower key!
# same minus max and total, but for individual linking kmers
# not all linking kmers
block_uid <- 1L
Prev_m1 <- 0L
for (m1 in seq_len(Size - 1L)) {
for (m2 in (m1 + 1L):Size) {
# build our data pool first
# regardless of how we align or if we're including search index or not, we need to prepare and collect
# the same basic statistics and look aheads
if (is.null(DataPool[[m1]])) {
# the pool position is empty, pull from the DB
# and generate the AAStructures
DataPool[[m1]]$DNA <- SearchDB(dbFile = dbConn,
tblName = "NTs",
identifier = ObjectIDs[m1],
verbose = FALSE,
nameBy = "description")
DataPool[[m1]]$AA <- SearchDB(dbFile = dbConn,
tblName = "AAs",
identifier = ObjectIDs[m1],
verbose = FALSE,
nameBy = "description")
DataPool[[m1]]$len <- width(DataPool[[m1]]$DNA)
DataPool[[m1]]$mod <- DataPool[[m1]]$len %% 3L == 0
DataPool[[m1]]$code <- GeneCalls[[m1]]$Coding
DataPool[[m1]]$cds <- lengths(GeneCalls[[m1]]$Range)
DataPool[[m1]]$struct <- PredictHEC(myAAStringSet = DataPool[[m1]]$AA,
type = "probabilities",
HEC_MI1 = MAT1,
HEC_MI2 = MAT2)
if (IncludeIndexSearch & !IgnoreDefaultStringSet) {
# return(list(DataPool[[m1]]$AA,
# DataPool[[m1]]$mod,
# DataPool[[m1]]$code))
DataPool[[m1]]$index <- do.call(what = "IndexSeqs",
args = c(list("subject" = DataPool[[m1]]$AA[DataPool[[m1]]$mod[DataPool[[m1]]$code]],
"verbose" = FALSE),
IndexParams))
} else if (IncludeIndexSearch & IgnoreDefaultStringSet) {
DataPool[[m1]]$index <- do.call(what = "IndexSeqs",
args = c(list("subject" = DataPool[[m1]]$DNA,
"verbose" = FALSE),
IndexParams))
}
} else {
# the pool position is not empty, assume that it's populated with all the information
# that it needs
}
if (is.null(DataPool[[m2]])) {
# the pool position is empty, pull from DB
# and generate the AAStructures
DataPool[[m2]]$DNA <- SearchDB(dbFile = dbConn,
tblName = "NTs",
identifier = ObjectIDs[m2],
verbose = FALSE,
nameBy = "description")
DataPool[[m2]]$AA <- SearchDB(dbFile = dbConn,
tblName = "AAs",
identifier = ObjectIDs[m2],
verbose = FALSE,
nameBy = "description")
DataPool[[m2]]$len <- width(DataPool[[m2]]$DNA)
DataPool[[m2]]$mod <- DataPool[[m2]]$len %% 3L == 0
DataPool[[m2]]$code <- GeneCalls[[m2]]$Coding
DataPool[[m2]]$cds <- lengths(GeneCalls[[m2]]$Range)
DataPool[[m2]]$struct <- PredictHEC(myAAStringSet = DataPool[[m2]]$AA,
type = "probabilities",
HEC_MI1 = MAT1,
HEC_MI2 = MAT2)
if (IncludeIndexSearch & !IgnoreDefaultStringSet) {
DataPool[[m2]]$index <- do.call(what = "IndexSeqs",
args = c(list("subject" = DataPool[[m2]]$AA[DataPool[[m2]]$mod[DataPool[[m2]]$code]],
"verbose" = FALSE),
IndexParams))
} else if (IncludeIndexSearch & IgnoreDefaultStringSet) {
DataPool[[m2]]$index <- do.call(what = "IndexSeqs",
args = c(list("subject" = DataPool[[m2]]$DNA,
"verbose" = FALSE),
IndexParams))
}
} else {
# the pool position is not empty, assume that it's populated with all the information
# that it needs
}
if (Prev_m1 != m1) {
QueryDNA <- DataPool[[m1]]$DNA
QueryAA <- DataPool[[m1]]$AA
QNTCount <- DataPool[[m1]]$len
QMod <- DataPool[[m1]]$mod
QCode <- DataPool[[m1]]$code
QCDSCount <- DataPool[[m1]]$cds
QueryStruct <- DataPool[[m1]]$struct
QueryIndex <- DataPool[[m1]]$index
} else {
# do something else?
}
SubjectDNA <- DataPool[[m2]]$DNA
SubjectAA <- DataPool[[m2]]$AA
SNTCount <- DataPool[[m2]]$len
SMod <- DataPool[[m2]]$mod
SCode <- DataPool[[m2]]$code
SCDSCount <- DataPool[[m2]]$cds
SubjectStruct <- DataPool[[m2]]$struct
SubjectIndex <- DataPool[[m2]]$index
if (IncludeIndexSearch) {
# step 1: build indexes into the data pool if they don't exist already
# # this is already accomplished
# step 2: run the searches
if (IgnoreDefaultStringSet) {
search_df1 <- do.call(what = "SearchIndex",
args = c(list("pattern" = QueryDNA,
"invertedIndex" = SubjectIndex,
"verbose" = FALSE,
"processors" = Processors),
SearchParams))
search_df2 <- do.call(what = "SearchIndex",
args = c(list("pattern" = SubjectDNA,
"invertedIndex" = QueryIndex,
"verbose" = FALSE,
"processors" = Processors),
SearchParams))
} else {
search_df1 <- do.call(what = "SearchIndex",
args = c(list("pattern" = QueryAA[QMod[QCode]],
"invertedIndex" = SubjectIndex,
"verbose" = FALSE,
"processors" = Processors),
SearchParams))
search_df2 <- do.call(what = "SearchIndex",
args = c(list("pattern" = SubjectAA[SMod[SCode]],
"invertedIndex" = QueryIndex,
"verbose" = FALSE,
"processors" = Processors),
SearchParams))
# return(list("seq" = QueryAA,
# "code" = QCode,
# "mod" = QMod))
#direction 1 is pattern -> subject
#direction 2 is subject -> pattern
search_df1 <- search_df1[order(search_df1$Pattern,
search_df1$Subject), ]
search_df2 <- search_df2[order(search_df2$Subject,
search_df2$Pattern), ]
# return(list(search_df1,
# search_df2))
search_i1 <- paste(search_df1$Pattern,
search_df1$Subject,
sep = "_")
search_i2 <- paste(search_df2$Subject,
search_df2$Pattern,
sep = "_")
search_pairs <- data.frame("p1" = names(QueryAA[QMod[QCode]])[search_df1$Pattern[search_i1 %in% search_i2]],
"p2" = names(SubjectAA[SMod[SCode]])[search_df1$Subject[search_i1 %in% search_i2]])
place_holder1 <- do.call(rbind,
strsplit(x = search_pairs$p1,
split = "_",
fixed = TRUE))
search_pairs$i1 <- as.integer(place_holder1[, 2L])
search_pairs$f1 <- as.integer(place_holder1[, 3L])
place_holder2 <- do.call(rbind,
strsplit(x = search_pairs$p2,
split = "_",
fixed = TRUE))
search_pairs$i2 <- as.integer(place_holder2[, 2L])
search_pairs$f2 <- as.integer(place_holder2[, 3L])
search_pairs$f_hits <- search_df1$Position[search_i1 %in% search_i2]
search_pairs$s1 <- GeneCalls[[m1]]$Strand[search_pairs$f1]
search_pairs$s2 <- GeneCalls[[m2]]$Strand[search_pairs$f2]
search_pairs$start1 <- GeneCalls[[m1]]$Start[search_pairs$f1]
search_pairs$start2 <- GeneCalls[[m2]]$Start[search_pairs$f2]
search_pairs$stop1 <- GeneCalls[[m1]]$Stop[search_pairs$f1]
search_pairs$stop2 <- GeneCalls[[m2]]$Stop[search_pairs$f2]
# slam everything together -- i.e. build out rows that need to be
# added to the linked pairs object
hit_adjust_start <- do.call(cbind,
search_pairs$f_hits)
hit_key <- vapply(X = search_pairs$f_hits,
FUN = function(x) {
ncol(x)
},
FUN.VALUE = vector(mode = "integer",
length = 1L))
hit_q_partner <- rep(search_pairs$f1,
times = hit_key)
hit_s_partner <- rep(search_pairs$f2,
times = hit_key)
# should be a list in the same shape as the SearchIndex Positions
# but with the hits mapped to (mostly) the right spots
# !!! IMPORTANT !!! This is limited to sequences without introns
# for this to work correctly with introns, I need to be able to divy
# these offsets up across CDSs, which though possible now will need
# some significant infrastructure changes to make work cleanly
# return(list("a" = search_pairs$f_hits,
# "d" = search_pairs$s1,
# "e" = search_pairs$s2,
# "f" = search_pairs$start1,
# "g" = search_pairs$start2,
# "h" = search_pairs$stop1,
# "i" = search_pairs$stop2))
hit_arrangement <- mapply(SIMPLIFY = FALSE,
FUN = function(a, d, e, f, g, h, i) {
# 0 == FALSE
# 1 == TRUE
# strand is 0 or 1, 1 == negative strand
if (d) {
# negative strand, flip, offset, then assign
# feature starts at 100
# 1 == 100
# 2 == 97
# 3 == 94
# etc
# right - (x - 1) * 3 = position
h1 <- h - (a[c(1, 2), , drop = FALSE] - 1) * 3
h1 <- apply(X = h1,
MARGIN = 2,
FUN = function(x) {
sort(x)
},
simplify = TRUE)
} else {
# positive strand, just offset and assign
# feature starts at 10,
# 1 == 10
# 2 == 13
# 3 == 14
# etc
# left + (x - 1) * 3 = position
h1 <- f + (a[c(1, 2), , drop = FALSE] - 1) * 3
}
# repeat for feature 2
if (e) {
h2 <- i - (a[c(3,4), , drop = FALSE] - 1) * 3
h2 <- apply(X = h2,
MARGIN = 2,
FUN = function(x) {
sort(x)
},
simplify = TRUE)
} else {
h2 <- g + (a[c(3,4), , drop = FALSE] - 1) * 3
}
return(rbind(h1, h2))
},
a = search_pairs$f_hits,
d = search_pairs$s1,
e = search_pairs$s2,
f = search_pairs$start1,
g = search_pairs$start2,
h = search_pairs$stop1,
i = search_pairs$stop2)
hit_rearrangement <- t(do.call(cbind,
hit_arrangement))
block_bounds <- lapply(X = hit_arrangement,
FUN = function(x) {
c(min(x[1, ]),
max(x[2, ]),
min(x[3, ]),
max(x[4, ]))
})
block_bounds <- do.call(rbind,
block_bounds)
hit_widths <- lapply(X = search_pairs$f_hits,
FUN = function(x) {
x[2, ] - x[1, ] + 1L
})
hit_totals <- vapply(X = hit_widths,
FUN = function(x) {
sum(x)
},
FUN.VALUE = vector(mode = "integer",
length = 1))
hit_max <- vapply(X = hit_widths,
FUN = function(x) {
max(x)
},
FUN.VALUE = vector(mode = "integer",
length = 1))
# return(list("QueryGene" = hit_q_partner,
# "SubjectGene" = hit_s_partner,
# "ExactOverlap" = unlist(hit_widths),
# "QueryIndex" = rep(search_pairs$i1,
# times = hit_key),
# "SubjectIndex" = rep(search_pairs$i2,
# times = hit_key),
# "QLeftPos" = hit_rearrangement[, 1],
# "QRightPos" = hit_rearrangement[, 2],
# "SLeftPos" = hit_rearrangement[, 3],
# "SRightPos" = hit_rearrangement[, 4]))
add_by_hit <- data.frame("QueryGene" = hit_q_partner,
"SubjectGene" = hit_s_partner,
"ExactOverlap" = unlist(hit_widths),
"QueryIndex" = rep(search_pairs$i1,
times = hit_key),
"SubjectIndex" = rep(search_pairs$i2,
times = hit_key),
"QLeftPos" = hit_rearrangement[, 1],
"QRightPos" = hit_rearrangement[, 2],
"SLeftPos" = hit_rearrangement[, 3],
"SRightPos" = hit_rearrangement[, 4])
add_by_block <- data.frame("QueryGene" = search_pairs$f1,
"SubjectGene" = search_pairs$f2,
"ExactOverlap" = hit_totals,
"QueryIndex" = search_pairs$i1,
"SubjectIndex" = search_pairs$i2,
"QLeftPos" = block_bounds[, 1],
"QRightPos" = block_bounds[, 2],
"SLeftPos" = block_bounds[, 3],
"SRightPos" = block_bounds[, 4],
"MaxKmerSize" = hit_max,
"TotalKmerHits" = hit_key)
# using forward hits from here:
# append pairs that don't appear in the current linked pairs object
# onto the current linked pairs object
}
# hits are now transposed into the context of the whole sequence
# as opposed to the feature
# build out rows to add, and then add them
if (nrow(SynExtendObject[[m1, m2]]) > 0) {
select_row1 <- paste(SynExtendObject[[m1, m2]][, 1L],
SynExtendObject[[m1, m2]][, 4L],
SynExtendObject[[m1, m2]][, 2L],
SynExtendObject[[m1, m2]][, 5L],
sep = "_")
select_row2 <- paste(SynExtendObject[[m2, m1]][, 1L],
SynExtendObject[[m2, m1]][, 4L],
SynExtendObject[[m2, m1]][, 2L],
SynExtendObject[[m2, m1]][, 5L],
sep = "_")
} else {
select_row1 <- select_row2 <- vector(mode = "character",
length = 0)
}
# upper diagonal is blocks,
# lower diagonal is hits
select_row3 <- paste(add_by_block$QueryGene,
add_by_block$QueryIndex,
add_by_block$SubjectGene,
add_by_block$SubjectIndex,
sep = "_")
select_row4 <- paste(add_by_hit$QueryGene,
add_by_hit$QueryIndex,
add_by_hit$SubjectGene,
add_by_hit$SubjectIndex,
sep = "_")
sr_upper <- !(select_row3 %in% select_row1)
sr_lower <- !(select_row4 %in% select_row2)
# return(list(select_row1,
# select_row2,
# select_row3,
# select_row4,
# SynExtendObject[[m1, m2]],
# SynExtendObject[[m2, m1]],
# add_by_hit,
# add_by_block))
if (any(sr_upper)) {
SynExtendObject[[m1, m2]] <- rbind(SynExtendObject[[m1, m2]],
as.matrix(add_by_block[sr_upper, ]))
SynExtendObject[[m2, m1]] <- rbind(SynExtendObject[[m2, m1]],
as.matrix(add_by_hit[sr_lower, ]))
}
# step 3: morph the searches into the SynExtendObject so other things
# go smoothly
# alignments happen later and profile vs pairs is chosen by the user
} # end include index search logical
# return(list(add_by_hit,
# add_by_block))
###### -- only evaluate valid positions ---------------------------------
if (nrow(SynExtendObject[[m1, m2]]) > 0L) {
# links table is populated, do whatever
PMatrix <- cbind(SynExtendObject[[m1, m2]][, 1L],
SynExtendObject[[m1, m2]][, 2L])
IMatrix <- cbind(SynExtendObject[[m1, m2]][, 4L],
SynExtendObject[[m1, m2]][, 5L])
# find the Consensus of each linking kmer hit
# this reference to GeneCalls is still fine
p1l <- GeneCalls[[m1]]$Stop[SynExtendObject[[m2, m1]][, 1L]] - GeneCalls[[m1]]$Start[SynExtendObject[[m2, m1]][, 1L]] + 1L
p2l <- GeneCalls[[m2]]$Stop[SynExtendObject[[m2, m1]][, 2L]] - GeneCalls[[m2]]$Start[SynExtendObject[[m2, m1]][, 2L]] + 1L
a1 <- SynExtendObject[[m2, m1]][, 6L]
a2 <- SynExtendObject[[m2, m1]][, 7L]
b1 <- SynExtendObject[[m2, m1]][, 8L]
b2 <- SynExtendObject[[m2, m1]][, 9L]
c1 <- GeneCalls[[m1]]$Start[SynExtendObject[[m2, m1]][, 1L]]
c2 <- GeneCalls[[m1]]$Stop[SynExtendObject[[m2, m1]][, 1L]]
d1 <- GeneCalls[[m2]]$Start[SynExtendObject[[m2, m1]][, 2L]]
d2 <- GeneCalls[[m2]]$Stop[SynExtendObject[[m2, m1]][, 2L]]
s1 <- GeneCalls[[m1]]$Strand[SynExtendObject[[m2, m1]][, 1L]] == 0L
s2 <- GeneCalls[[m2]]$Strand[SynExtendObject[[m2, m1]][, 2L]] == 0L
diff1 <- mapply(function(o, p, q, r, s, t, u, v, w, x, y, z) {
if (o == p) {
mean(c(abs((abs(w - s) / q) - (abs(y - u) / r)),
abs((abs(t - x) / q) - (abs(v - z) / r))))
} else {
mean(c(abs((abs(w - s) / q) - (abs(v - z) / r)),
abs((abs(t - x) / q) - (abs(y - u) / r))))
}
},
o = s1,
p = s2,
q = p1l,
r = p2l,
s = a1,
t = a2,
u = b1,
v = b2,
w = c1,
x = c2,
y = d1,
z = d2)
# get the mean consensus
diff2 <- vector(mode = "numeric",
length = nrow(SynExtendObject[[m1, m2]]))
for (m3 in seq_along(diff2)) {
diff2[m3] <- 1 - mean(diff1[SynExtendObject[[m2, m1]][, 1L] == SynExtendObject[[m1, m2]][m3, 1L] &
SynExtendObject[[m2, m1]][, 2L] == SynExtendObject[[m1, m2]][m3, 2L]])
}
# max match size
MatchMax <- SynExtendObject[[m1, m2]][, "MaxKmerSize"]
# total unique matches
UniqueMatches <- SynExtendObject[[m1, m2]][, "TotalKmerHits"]
# total matches at all
TotalMatch <- SynExtendObject[[m1, m2]][, "ExactOverlap"]
# block size determination
if (nrow(SynExtendObject[[m1, m2]]) > 1) {
# only run block size checks if enough rows are present
FeaturesMat <- data.frame("i1" = IMatrix[, 1L],
"f1" = PMatrix[, 1L],
"i2" = IMatrix[, 2L],
"f2" = PMatrix[, 2L])
dr1 <- FeaturesMat[, 2L] + FeaturesMat[, 4L]
dr2 <- FeaturesMat[, 2L] - FeaturesMat[, 4L]
InitialBlocks1 <- unname(split(x = FeaturesMat,
f = list(as.integer(FeaturesMat[, 1L]),
as.integer(FeaturesMat[, 3L]),
dr1),
drop = TRUE))
InitialBlocks2 <- unname(split(x = FeaturesMat,
f = list(as.integer(FeaturesMat[, 1L]),
as.integer(FeaturesMat[, 3L]),
dr2),
drop = TRUE))
Blocks <- c(InitialBlocks1[sapply(InitialBlocks1,
function(x) nrow(x),
simplify = TRUE) > 1],
InitialBlocks2[sapply(InitialBlocks2,
function(x) nrow(x),
simplify = TRUE) > 1])
L01 <- length(Blocks)
if (L01 > 0) {
for (m3 in seq_along(Blocks)) {
# blocks are guaranteed to contain more than 1 row
sp1 <- vector(mode = "integer",
length = nrow(Blocks[[m3]]))
# we need to check both columns here, this currently is not correct
# in all cases
sp2 <- Blocks[[m3]][, 4L]
sp3 <- Blocks[[m3]][, 2L]
it1 <- 1L
it2 <- sp2[1L]
it4 <- sp3[1L]
# create a map vector on which to split the groups, if necessary
for (m4 in seq_along(sp1)) {
it3 <- sp2[m4]
it5 <- sp3[m4]
if ((it3 - it2 > 1L) |
(it5 - it4 > 1L)) {
# if predicted pairs are not contiguous, update the iterator
it1 <- it1 + 1L
}
sp1[m4] <- it1
it2 <- it3
it4 <- it5
}
# if the splitting iterator was updated at all, a gap was detected
if (it1 > 1L) {
Blocks[[m3]] <- unname(split(x = Blocks[[m3]],
f = sp1))
} else {
Blocks[[m3]] <- Blocks[m3]
}
} # end m3 loop
# Blocks is now a list where each position is a set of blocked pairs
Blocks <- unlist(Blocks,
recursive = FALSE)
# drop blocks of size 1, they do not need to be evaluated
Blocks <- Blocks[sapply(X = Blocks,
FUN = function(x) {
nrow(x)
},
simplify = TRUE) > 1L]
L01 <- length(Blocks)
AbsBlockSize <- rep(1L,
nrow(FeaturesMat))
BlockID_Map <- rep(-1L,
nrow(FeaturesMat))
# only bother with this if there are blocks remaining
# otherwise AbsBlockSize, which is initialized as a vector of 1s
# will be left as a vector of 1s, all pairs are singleton pairs in this scenario
if (L01 > 0L) {
for (m3 in seq_along(Blocks)) {
# rownames of the Blocks dfs relate to row positions in the original
# matrix
pos <- as.integer(rownames(Blocks[[m3]]))
val <- rep(nrow(Blocks[[m3]]),
nrow(Blocks[[m3]]))
# do not overwrite positions that are in larger blocks
keep <- AbsBlockSize[pos] < val
if (any(keep)) {
AbsBlockSize[pos[keep]] <- val[keep]
BlockID_Map[pos[keep]] <- rep(block_uid,
sum(keep))
block_uid <- block_uid + 1L
}
} # end m3 loop
} # end logical check for block size
} else {
# no blocks observed, all pairs present are singleton pairs
AbsBlockSize <- rep(1L,
nrow(FeaturesMat))
BlockID_Map <- rep(-1L,
nrow(FeaturesMat))
}
} else {
AbsBlockSize <- 1L
BlockID_Map <- -1L
}
# BlockSize evaluation is complete
# we're eventually moving blocksize stuff down to happen post everything else
# from here we need to get the kmer differences
# the PIDs
# the SCOREs
# if both positions are present in the FeatureSeqs object, do nothing
# if either or both is missing, they need to be pulled from the DB
# this functionality needs to be expanded eventually to take in cases where the
# we're overlaying something with gene calls on something that doesn't have gene calls
# if (!all(ObjectIDs[c(m1, m2)] %in% FeatureSeqs$IDs)) {
# # an object ID does not have a seqs present, pull them
# # this is not a priority so we're leaving this blank for a second
# } else {
# TMPSeqs01 <- FALSE
# TMPSeqs02 <- FALSE
# }
# align everyone as AAs who can be, i.e. modulo of 3, is coding, etc
# then align everyone else as nucs
# translate the hit locations to the anchor positions
# every hit is an anchor
# all hits are stored in the LinkedPairs object in nucleotide space
# as left/right bounds, orientations will be dependant upon strandedness of the genes
# prepare the kmer distance stuff:
NucDist <- vector(mode = "numeric",
length = nrow(PMatrix))
if (KmerSize < 10L) {
nuc1 <- oligonucleotideFrequency(x = QueryDNA[PMatrix[, 1L]],
width = KmerSize,
as.prob = TRUE)
nuc2 <- oligonucleotideFrequency(x = SubjectDNA[PMatrix[, 2L]],
width = KmerSize,
as.prob = TRUE)
} else {
stop("non-overlapping kmers not implemented yet")
}
QueryFeatureLength <- QNTCount[PMatrix[, 1L]]
SubjectFeatureLength <- SNTCount[PMatrix[, 2L]]
# Perform the alignments
if (AlignmentFun == "AlignPairs") {
# grab kmer distances ahead of time because AlignPairs doesn't need to loop
# through anything
for (m3 in seq_along(NucDist)) {
NucDist[m3] <- sqrt(sum((nuc1[m3, ] - nuc2[m3, ])^2)) / ((sum(nuc1[m3, ]) + sum(nuc2[m3, ])) / 2)
}
# we need to check and default stringset logical and the retain anchor logical
if (IgnoreDefaultStringSet) {
# spit out something that makes it clear there's only a single call to
# AlignPairs that is calling NTs
# not much to do here, set the planning df as PMatrix and call it a day
# df_plan <- data.frame("Pattern" = match(x = AASubSet[, 1L],
# table = aa_match1),
# "Subject" = match(x = AASubSet[, 2L],
# table = aa_match2))
df_nt <- data.frame("Pattern" = PMatrix[, 1L],
"Subject" = PMatrix[, 2L])
df_aa <- data.frame("Pattern" = integer(),
"Subject" = integer())
AASelect <- rep(FALSE, nrow(PMatrix))
NTSelect <- rep(TRUE, nrow(PMatrix))
AASubSet <- PMatrix[AASelect, , drop = FALSE]
} else {
# spit out the subset vectors and logicals to correctly call both AlignPairs calls
# and both dfs
AASelect <- QMod[PMatrix[, 1L]] & QCode[PMatrix[, 1L]] & SMod[PMatrix[, 2L]] & SCode[PMatrix[, 2L]]
NTSelect <- !AASelect
AASubSet <- PMatrix[AASelect, , drop = FALSE]
aa_match1 <- strsplit(x = names(QueryAA),
split = "_",
fixed = TRUE)
aa_match1 <- as.integer(sapply(X = aa_match1,
FUN = function(x) {
x[3]
}))
aa_match2 <- strsplit(x = names(SubjectAA),
split = "_",
fixed = TRUE)
aa_match2 <- as.integer(sapply(X = aa_match2,
FUN = function(x) {
x[3]
}))
df_aa <- data.frame("Pattern" = match(x = AASubSet[, 1L],
table = aa_match1),
"Subject" = match(x = AASubSet[, 2L],
table = aa_match2))
df_nt <- data.frame("Pattern" = PMatrix[NTSelect, 1L],
"Subject" = PMatrix[NTSelect, 2L])
}
if (RetainAnchors) {
# set the anchors
# start with nucleotide positions
hitsets <- SynExtendObject[[m2, m1]][, c(1, 2), drop = FALSE]
WithinQueryNucs <- mapply(USE.NAMES = FALSE,
SIMPLIFY = FALSE,
FUN = function(i1,
i2,
left1,
left2,
right1,
right2,
strand1,
strand2,
querygeneboundl,
querygeneboundr,
subgeneboundl,
subgeneboundr) {
# a function
if (strand1 == 0) {
start1 <- left1 - querygeneboundl + 1L
stop1 <- right1 - querygeneboundl + 1L
} else {
start1 <- querygeneboundr - right1 + 1L
stop1 <- querygeneboundr - left1 + 1L
}
if (strand2 == 0) {
start2 <- left2 - subgeneboundl + 1L
stop2 <- right2 - subgeneboundl + 1L
} else {
start2 <- subgeneboundr - right2 + 1L
stop2 <- subgeneboundr - left2 + 1L
}
return(matrix(data = c(start1,
stop1,
start2,
stop2),
ncol = 1L))
},
i1 = hitsets[, 1L],
i2 = hitsets[, 2L],
strand1 = GeneCalls[[IDMatch[m1]]]$Strand[hitsets[, 1L]],
strand2 = GeneCalls[[IDMatch[m2]]]$Strand[hitsets[, 2L]],
querygeneboundl = GeneCalls[[IDMatch[m1]]]$Start[hitsets[, 1L]],
querygeneboundr = GeneCalls[[IDMatch[m1]]]$Stop[hitsets[, 1L]],
subgeneboundl = GeneCalls[[IDMatch[m2]]]$Start[hitsets[, 2L]],
subgeneboundr = GeneCalls[[IDMatch[m2]]]$Stop[hitsets[, 2L]],
left1 = SynExtendObject[[m2, m1]][, "QLeftPos"],
left2 = SynExtendObject[[m2, m1]][, "SLeftPos"],
right1 = SynExtendObject[[m2, m1]][, "QRightPos"],
right2 = SynExtendObject[[m2, m1]][, "SRightPos"])
WithinQueryNucs <- unname(split(x = WithinQueryNucs,
f = rep(x = seq(nrow(PMatrix)),
times = SynExtendObject[[m1, m2]][, "TotalKmerHits"])))
WithinQueryNucs <- lapply(X = WithinQueryNucs,
FUN = function(x) {
do.call(cbind, x)
})
for (m3 in seq_along(WithinQueryNucs)) {
o1 <- order(WithinQueryNucs[[m3]][1L, , drop = FALSE])
o2 <- order(WithinQueryNucs[[m3]][3L, , drop = FALSE])
if (length(o1) > 1L) {
if (o1[1L] > o1[2L]) {
WithinQueryNucs[[m3]][c(1,2), ] <- WithinQueryNucs[[m3]][c(1,2), o1, drop = FALSE]
}
if (o2[1L] > o2[2L]) {
WithinQueryNucs[[m3]][3:4, ] <- WithinQueryNucs[[m3]][3:4, o2, drop = FALSE]
}
}
}
# if there are any AA alignments to do
WithinQueryAAs <- WithinQueryNucs[AASelect]
if (nrow(AASubSet) > 0) {
CurrentQCDSs <- QCDSCount[AASubSet[, 1L]]
CurrentSCDSs <- SCDSCount[AASubSet[, 2L]]
# loop through the nucleotide anchor positions
# drop all anchors in cases where there are more than one CDS
# drop anchors that are out of frame, or that are in NT space
for (m3 in seq_along(WithinQueryAAs)) {
if (CurrentQCDSs[m3] > 1L |
CurrentSCDSs[m3] > 1L) {
# print(m3)
WithinQueryAAs[[m3]] <- integer()
} else {
# check the anchors then drop or adjust
size_check <- apply(X = WithinQueryAAs[[m3]],
MARGIN = 2L,
FUN = function(x) {
x[c(FALSE, TRUE)] - x[c(TRUE, FALSE)] + 1L
}) %% 3L == 0L
frame_check <- apply(X = WithinQueryAAs[[m3]][c(TRUE,FALSE), , drop = FALSE],
MARGIN = 2L,
FUN = function(x) {
((x + 2L) %% 3L) == 0L
})
# size check is a matrix of logicals
# frame check is a matrix of logicals
# drop hits that aren't coding
# and drop hits that aren't in the appropriate frame
WithinQueryAAs[[m3]] <- WithinQueryAAs[[m3]][, (colSums(size_check) == 2L) &
(colSums(frame_check) == 2L),
drop = FALSE]
# we need to offset the start, but not the end
# this can return an integer of length zero when nothing passes
WithinQueryAAs[[m3]] <- matrix(data = as.integer((WithinQueryAAs[[m3]] + c(2L, 0L)) / 3L),
nrow = nrow(WithinQueryAAs[[m3]]),
ncol = ncol(WithinQueryAAs[[m3]]))
# ensure that anchors are in ascending order
}
} # end of m3 loop
} # WithinQueryAAs logical check
# df_aa$Position <- WithinQueryAAs
# ph_obj <- WithinQueryAAs
if (IncludeIndexSearch) {
full_aa_set <- paste(df_aa$Pattern,
df_aa$Subject,
sep = "_")
# this isn't right because these need to be indexed to the
# available coding sequences
aa_w_index <- paste(match(x = search_pairs$f1,
table = aa_match1),
match(x = search_pairs$f2,
table = aa_match2),
sep = "_")
# return(list("a" = full_aa_set,
# "b" = aa_w_index))
WithinQueryAAs[match(x = aa_w_index,
table = full_aa_set)] <- search_pairs$f_hits
}
df_aa$Position <- WithinQueryAAs
df_aa$Position <- mapply(SIMPLIFY = FALSE,
FUN = function(x, y, z) {
if (length(x) > 4) {
cbind(matrix(0L,
4),
x,
matrix(c(y, y, z, z),
4))
} else {
cbind(matrix(0L,
4),
matrix(data = c(y,y,z,z),
4))
}
},
x = df_aa$Position,
y = width(QueryAA[df_aa$Pattern]) + 1L,
z = width(SubjectAA[df_aa$Subject]) + 1L)
df_nt$Position <- WithinQueryNucs[NTSelect]
df_nt$Position <- mapply(SIMPLIFY = FALSE,
FUN = function(x, y, z) {
if (length(x) > 4) {
cbind(matrix(0L,
4),
x,
matrix(c(y, y, z, z),
4))
} else {
cbind(matrix(0L,
4),
matrix(data = c(y,y,z,z),
4))
}
},
x = df_nt$Position,
y = QNTCount[df_nt$Pattern] + 1L,
z = SNTCount[df_nt$Subject] + 1L)
# return(list(WithinQueryAAs,
# WithinQueryNucs,
# QueryAA,
# SubjectAA))
# one last check to reject hits that alignpairs views as overlapping
for (m3 in seq_along(df_aa$Position)) {
check_this_pattern <- check_this_subject <- vector(mode = "integer",
length = ncol(df_aa$Position[[m3]]) * 2L)
check_this_pattern[c(TRUE, FALSE)] <- df_aa$Position[[m3]][1, ]
check_this_pattern[c(FALSE, TRUE)] <- df_aa$Position[[m3]][2, ]
check_this_subject[c(TRUE, FALSE)] <- df_aa$Position[[m3]][3, ]
check_this_subject[c(FALSE, TRUE)] <- df_aa$Position[[m3]][4, ]
while (is.unsorted(check_this_pattern) |
is.unsorted(check_this_subject)) {
# print(m3)
# print(check_this_pattern)
# print(check_this_subject)
# print(df_aa$Position[[m3]])
hit_sizes <- df_aa$Position[[m3]][2, ] - df_aa$Position[[m3]][1, ] + 1L
# drop the smallest hit that is not an anchor
z1 <- which.min(hit_sizes[hit_sizes > 1])
# i'm not sure if this is safe, but this will be refactored to
# occur before anchoring anyway
if (length(z1) < 1) {
print("unexpected condition")
return(list("pos" = m3,
"hits" = df_aa$Position))
stop("an unexpected condition occurred, please contact maintainer")
} else {
df_aa$Position[[m3]] <- df_aa$Position[[m3]][, -(z1 + 1L), drop = FALSE]
}
check_this_pattern <- check_this_subject <- vector(mode = "integer",
length = ncol(df_aa$Position[[m3]]) * 2L)
check_this_pattern[c(TRUE, FALSE)] <- df_aa$Position[[m3]][1, ]
check_this_pattern[c(FALSE, TRUE)] <- df_aa$Position[[m3]][2, ]
check_this_subject[c(TRUE, FALSE)] <- df_aa$Position[[m3]][3, ]
check_this_subject[c(FALSE, TRUE)] <- df_aa$Position[[m3]][4, ]
}
}
# repeat for nt positions
for (m3 in seq_along(df_nt$Position)) {
check_this_pattern <- check_this_subject <- vector(mode = "integer",
length = ncol(df_nt$Position[[m3]]) * 2L)
check_this_pattern[c(TRUE, FALSE)] <- df_nt$Position[[m3]][1, ]
check_this_pattern[c(FALSE, TRUE)] <- df_nt$Position[[m3]][2, ]
check_this_subject[c(TRUE, FALSE)] <- df_nt$Position[[m3]][3, ]
check_this_subject[c(FALSE, TRUE)] <- df_nt$Position[[m3]][4, ]
while (is.unsorted(check_this_pattern) |
is.unsorted(check_this_subject)) {
hit_sizes <- df_nt$Position[[m3]][2, ] - df_nt$Position[[m3]][1, ] + 1L
# drop the smallest hit that is not an anchor
z1 <- which.min(hit_sizes[hit_sizes > 1])
# i'm not sure if this is safe, but this will be refactored to
# occur before anchoring anyway
if (length(z1) < 1) {
stop("an unexpected condition occurred, please contact maintainer")
} else {
df_nt$Position[[m3]] <- df_nt$Position[[m3]][, -(z1 + 1L), drop = FALSE]
}
check_this_pattern <- check_this_subject <- vector(mode = "integer",
length = ncol(df_nt$Position[[m3]]) * 2L)
check_this_pattern[c(TRUE, FALSE)] <- df_nt$Position[[m3]][1, ]
check_this_pattern[c(FALSE, TRUE)] <- df_nt$Position[[m3]][2, ]
check_this_subject[c(TRUE, FALSE)] <- df_nt$Position[[m3]][3, ]
check_this_subject[c(FALSE, TRUE)] <- df_nt$Position[[m3]][4, ]
}
}
} else {
# don't set anchors at all
WithinQueryAAs <- WithinQueryNucs <- list()
}
if (sum(AASelect) > 0) {
# return(list("q" = QueryAA,
# "s" = SubjectAA,
# "df" = df_aa,
# "ph" = ph_obj))
aapairs <- AlignPairs(pattern = QueryAA,
subject = SubjectAA,
pairs = df_aa,
verbose = FALSE,
processors = Processors)
current_aa_pids <- aapairs$Matches / aapairs$AlignmentLength
current_aa_scores <- aapairs$Score / aapairs$AlignmentLength
} else {
current_aa_pids <- numeric()
current_aa_scores <- numeric()
}
if (Verbose) {
PBCount <- PBCount + 1L
setTxtProgressBar(pb = pBar,
value = PBCount / Total)
}
if (sum(NTSelect) > 0) {
ntpairs <- AlignPairs(pattern = QueryDNA,
subject = SubjectDNA,
pairs = df_nt,
verbose = FALSE,
processors = Processors)
current_nt_pids <- ntpairs$Matches / ntpairs$AlignmentLength
current_nt_scores <- ntpairs$Score / ntpairs$AlignmentLength
} else {
current_nt_pids <- numeric()
current_nt_scores <- numeric()
}
if (Verbose) {
PBCount <- PBCount + 1L
setTxtProgressBar(pb = pBar,
value = PBCount / Total)
}
vec1 <- vec2 <- vector(mode = "numeric",
length = nrow(PMatrix))
vec1[AASelect] <- current_aa_pids
vec1[NTSelect] <- current_nt_pids
vec2[AASelect] <- current_aa_scores
vec2[NTSelect] <- current_nt_scores
# For Testing
# AA_Anchors[[Count]] <- WithinQueryAAs
# NT_Anchors[[Count]] <- WithinQueryNucs[NTSelect]
# return(list(aapairs,
# ntpairs))
} else if (AlignmentFun == "AlignProfiles") {
# build out a map of who is being called where
# if we're aligning nucleotides, just call the position in the DNA
# stringsets,
# if you not, use the matched positions
ws1 <- match(table = names(QueryAA),
x = names(QueryDNA)[PMatrix[, 1L]])
ws2 <- match(table = names(SubjectAA),
x = names(SubjectDNA)[PMatrix[, 2L]])
vec1 <- vec2 <- vector(mode = "numeric",
length = length(NucDist))
# if we're aligning everything as NTs, just cheat and change the coding
# logical
if (IgnoreDefaultStringSet) {
QCode <- rep(FALSE,
length(QCode))
}
AASelect <- QCode[PMatrix[, 1L]] & QMod[PMatrix[, 1L]] & SCode[PMatrix[, 2L]] & SMod[PMatrix[, 2L]]
# return(list("AASelect" = AASelect,
# "QCode" = QCode[PMatrix[, 1L]],
# "QMod" = QMod[PMatrix[, 1L]],
# "SCode" = SCode[PMatrix[, 2L]],
# "SMod" = SMod[PMatrix[, 2L]],
# "PMatrix" = PMatrix,
# "DataPool" = DataPool,
# "m1" = m1,
# "m2" = m2,
# "ws1" = ws1,
# "ws2" = ws2))
for (m3 in seq_along(NucDist)) {
NucDist[m3] <- sqrt(sum((nuc1[m3, ] - nuc2[m3, ])^2)) / ((sum(nuc1[m3, ]) + sum(nuc2[m3, ])) / 2)
if (AASelect[m3]) {
# align as amino acids
ph1 <- AlignProfiles(pattern = QueryAA[ws1[m3]],
subject = SubjectAA[ws2[m3]],
p.struct = QueryStruct[ws1[m3]],
s.struct = SubjectStruct[ws2[m3]])
ph2 <- DistanceMatrix(myXStringSet = ph1,
includeTerminalGaps = TRUE,
type = "matrix",
verbose = FALSE)
ph3 <- ScoreAlignment(myXStringSet = ph1,
structures = PredictHEC(myAAStringSet = ph1,
type = "probabilities",
HEC_MI1 = MAT1,
HEC_MI2 = MAT2),
structureMatrix = structureMatrix)
} else {
# align as nucleotides
ph1 <- AlignProfiles(pattern = QueryDNA[PMatrix[m3, 1L]],
subject = SubjectDNA[PMatrix[m3, 2L]])
ph2 <- DistanceMatrix(myXStringSet = ph1,
includeTerminalGaps = TRUE,
type = "matrix",
verbose = FALSE)
ph3 <- ScoreAlignment(myXStringSet = ph1,
substitutionMatrix = substitutionMatrix)
}
vec1[m3] <- 1 - ph2[1, 2]
vec2[m3] <- ph3
if (Verbose) {
PBCount <- PBCount + 1L
setTxtProgressBar(pb = pBar,
value = PBCount / Total)
}
} # end m3 loop
} # end if else on alignment function
PH[[Count]] <- data.frame("p1" = names(QueryDNA)[PMatrix[, 1]],
"p2" = names(SubjectDNA)[PMatrix[, 2]],
"Consensus" = diff2,
"p1featurelength" = QueryFeatureLength,
"p2featurelength" = SubjectFeatureLength,
"blocksize" = AbsBlockSize,
"KDist" = NucDist,
"TotalMatch" = TotalMatch,
"MaxMatch" = MatchMax,
"UniqueMatches" = UniqueMatches,
"PID" = vec1,
"Score" = vec2,
"Alignment" = ifelse(test = AASelect,
yes = "AA",
no = "NT"),
"Block_UID" = BlockID_Map)
} else {
# link table is not populated
PH[[Count]] <- data.frame("p1" = character(),
"p2" = character(),
"Consensus" = numeric(),
"p1featurelength" = integer(),
"p2featurelength" = integer(),
"blocksize" = integer(),
"KDist" = numeric(),
"TotalMatch" = integer(),
"MaxMatch" = integer(),
"UniqueMatches" = integer(),
"PID" = numeric(),
"Score" = numeric(),
"Alignment" = character(),
"Block_UID" = integer())
}
# Count and PBCount are unlinked,
# iterate through both separately and correctly
Count <- Count + 1L
if (object.size(DataPool) > Storage) {
sw1 <- sapply(X = DataPool,
FUN = function(x) {
is.null(x)
})
sw2 <- which(sw1)
if (length(sw2) > 0) {
# bonk the first one ... this might realistically need to happen in a while loop, but for now we can live with this
DataPool[[sw2[1L]]] <- NULL
} else {
# i don't know if this case can happen, but we're putting a print statement here just in case
print("Unexpected case occured while managing storage. Please contact maintainer.")
}
}
Prev_m1 <- m1
} # end m2
} # end m1
res <- do.call(rbind,
PH)
# return(res)
All_UIDs <- unique(res$Block_UID)
res$Block_UID[res$Block_UID == -1L] <- seq(from = max(All_UIDs) + 1L,
by = 1L,
length.out = sum(res$Block_UID == -1L))
attr(x = res,
which = "GeneCalls") <- GeneCalls
class(res) <- c("data.frame",
"PairSummaries")
attr(x = res,
which = "AlignmentFunction") <- AlignmentFun
attr(x = res,
which = "KVal") <- KmerSize
# attr(x = res,
# which = "NT_Anchors") <- NT_Anchors
# close pBar and return res
if (Verbose) {
TimeEnd <- Sys.time()
close(pBar)
print(TimeEnd - TimeStart)
}
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.