#' getDistMat
#' calculates distance matrix using parallelDist
#' @param numberCores number of CPU cores to use
#' @param matrix matrix to calculate distance matrix for
#' @return distance matrix
#' examples getDistMat(numberCores, matrix)
getDistMat <- function(numberCores, matrix) {
id <- shiny::showNotification("Calculating distance matrix parallel...", duration = NULL, closeButton = FALSE)
base::on.exit(shiny::removeNotification(id), add = TRUE)
#distMat <- spam::as.spam.dist(
if (length(matrix) > 100000) {
distMat <- stats::as.dist(
parallelDist::parallelDist(
base::as.matrix(matrix),
method = "euclidean",
labels = TRUE,
threads = numberCores
)
)
if (all(is.na(distMat))) {
browser() #this should not happen, if dist(matrix) delivers a result, then something is wrong with parallelDist
}
}
else {
distMat <- stats::dist(base::as.matrix(matrix), method = "euclidean")
}
return(distMat)
}
#' getClustResFast
#' calculates hierarchical clustering using fastcluster
#' @param distanceMatrix distance matrix
#' @return hclust object
#' examples getClustResFast(distanceMatrix)
getClustResFast <- function(distanceMatrix) {
id <- shiny::showNotification("Calculating clustering results...", duration = NULL, closeButton = FALSE)
base::on.exit(shiny::removeNotification(id), add = TRUE)
base::print(base::paste0(sysTimePID(), " start getClustResFast()"))
#check size of distanceMatrix
if (is.valid(distanceMatrix)) {
base::print(base::paste0(sysTimePID(), " clustering trait data."))
#startTime <- Sys.time()
gc()
ClustRes <- fastcluster::hclust(stats::as.dist(distanceMatrix), method = "ward.D2")
}
else {
ClustRes <- NULL
base::message(base::paste0(sysTimePID(), " is.valid(distanceMatrix) == FALSE"))
}
return(ClustRes)
}
#' #' updateTxtClusterOut
#' #' generates summary text after clustering
#' #' @param traitReducedcombinedDFP_Val_Labels data structure with trait reduced results
#' #' @param minP_Val minimum p_value for model to use for clustering
#' #' @param maxP_Val maximum p_value for model to use for clustering
#' #' @param minN minimum n for model to use for clustering
#' #' @param sldNumClasses number of classes to use for clustering
#' #' @return text
#' #' examples updateTxtClusterOut(traitReducedcombinedDFP_Val_Labels, minP_Val, maxP_Val, minN, sldNumClasses)
#' updateTxtClusterOut <- function(traitReducedcombinedDFP_Val_Labels, minP_Val, maxP_Val, minN, sldNumClasses) {
#' base::tryCatch(
#' {
#' result <- NULL
#' if (is.valid(traitReducedcombinedDFP_Val_Labels)) {
#' maxClasses <- length(traitReducedcombinedDFP_Val_Labels$mergedColnames)
#' numRow <- nrow(traitReducedcombinedDFP_Val_Labels$dfP_Val)
#' numCol <- ncol(traitReducedcombinedDFP_Val_Labels$dfP_Val)
#' minClasses <- 1 #dendextend::min_depth(session$userData$sessionVariables$dendTraits)
#' result <- base::paste0("finding trait clusters successful. found minClusters = ",
#' minClasses, "; maxClusters: ", maxClasses, "; Clustering made for numClasses = ", sldNumClasses, ".\n",
#' "size of result df: nrow(CpG)=", numRow, ", ncol(trait)=", numCol, ".")
#' }
#' },
#' error = function(e) {
#' base::message("An error occurred in updateTxtClusterOut():\n", e)
#' },
#' warning = function(w) {
#' base::message("A warning occurred in updateTxtClusterOut():\n", w)
#' },
#' finally = {
#' return(shiny::HTML(result))
#' }
#' )
#' }
#' calculateDistanceNeigboursProbes
#' calculate distance from each probe to its neigbours and gives back data frame with distance metrics
#' @param wd working directory
#' @param clustResProbes data structure with clustering result
#' @param annotation annotation of CpG (names, location etc.)
#' @param distanceToLook maximum distance to look for
#' @param numCores number of cores to use for distance calculation
#' @return data.frame with min, mean, max distance and nuber of CpG in distanceToLook window
#' examples calculateDistanceNeigboursProbes(wd, clustResProbes, annotation, distanceToLook, numCores)
calculateDistanceNeigboursProbes <- function(wd, clustResProbes, annotation, distanceToLook, numCores) {
id <- shiny::showNotification("Calculating distance neighbours probes...", duration = NULL, closeButton = FALSE)
base::on.exit(shiny::removeNotification(id), add = TRUE)
base::tryCatch(
{
base::print(base::paste0(sysTimePID(), " start calculateDistanceNeigboursProbes() with max distance (distanceToLook) = ", distanceToLook, "."))
#get chr and location from annotation
maxDistanceToLook <- distanceToLook
annotation <- subset(annotation, select = c("name", "chromosome", "position"))
#merge annotation
CpG <- data.table::as.data.table(clustResProbes$labels[clustResProbes$order]) #as.data.frame(clustResProbes$labels[clustResProbes$order])
colnames(CpG)[1] <- "label"
#CpG$order <- seq(1:nrow(CpG))
CpG$order <- seq_len(base::nrow(CpG))
distances <- base::merge(CpG, annotation, by.x = "label", by.y = "name")
#DNAdistancesUp <- data.table::as.data.table(seq_along(distances[, 2]), 2) #base::data.frame(seq_along(distances[, 2]), 2)
DNAdistances <- data.table::as.data.table(seq_along(distances[, 2]), 5) #base::data.frame(seq_along(distances[, 2]), 5)
#sort order given by clustering
distances <- distances[base::order(distances$order),]
#library(future) #we have this already in DESCRIPTION file, but without "library(future)" here, it won't work. Strange.
#library(doFuture)
#future::plan(strategy = future::multisession, workers = numCores)
#calculate mean distance to distanceToLook next probes in given order, omit probes with different chr
# foreach::foreach(i = seq_along(distances[, 2]), .combine = rbind, .verbose = TRUE) %dofuture% { #for all objects in distances
foreach::foreach(i = seq_along(distances[, 2])) %do% { #for all objects in distances
#for(i in seq_along(distances[,2])) { #for all objects in distances
#base::source(paste0(wd, "/R/Clustering.R")) #this is necessary for foreach %dopar% to run properly
currentCpG <- distances[i, ]
#cut out defined number of probes (up- and downstream from CpG)
lowerBorder <- i-distanceToLook
if (lowerBorder < 1) {lowerBorder <- 1}#
upperBorder <- i+distanceToLook
if (upperBorder > nrow(distances)) {upperBorder <- nrow(distances)}
distancesToLook <- distances [lowerBorder:upperBorder, ]
#do not iterate over all probes, but exclude those from different chr first
chr <- currentCpG$chr
distancesToLook <- distancesToLook[distancesToLook$chr == chr,]
distanceToLook <- nrow(distancesToLook)
if (distanceToLook > 1) {
DNAdistancesUp <- data.table::as.data.table(seq_along(1:distanceToLook), 2) #base::data.frame(seq_along(1:distanceToLook), 2)
#upstream
foreach::foreach(j = 1:distanceToLook) %do% { #max. distance given by distanceToLook
#for(j in 1:distanceToLook) { #max. distance given by distanceToLook
CpG <- distances[j, ]
if (currentCpG$label != CpG$label) {
DNAdistancesUp[j, 2] <- base::abs(currentCpG$position - CpG$position)
}
else {
DNAdistancesUp[j, 2] <- NA
}
DNAdistancesUp[j, 1] <- currentCpG$label
}
#browser() #check number of nearby CpG
if (is.numeric(DNAdistancesUp[j, 2])) {
DNAdistances[i, 2] <- base::min((DNAdistancesUp[, 2]), na.rm = TRUE)
DNAdistances[i, 3] <- base::mean((DNAdistancesUp[, 2]), na.rm = TRUE)
DNAdistances[i, 4] <- base::max((DNAdistancesUp[, 2]), na.rm = TRUE)
DNAdistances[i, 5] <- base::length(na.omit(DNAdistancesUp[, 2]))
}
else {
DNAdistances[i, 2] <- NA
DNAdistances[i, 3] <- NA
DNAdistances[i, 4] <- NA
DNAdistances[i, 5] <- NA
}
DNAdistances[i, 1] <- currentCpG$label
}
else {
#no near CpG on the same chromosome found
DNAdistances[i, 2] <- NA
DNAdistances[i, 3] <- NA
DNAdistances[i, 4] <- NA
DNAdistances[i, 5] <- NA
}
DNAdistances[i, 1] <- currentCpG$label
}
colnames(DNAdistances) <- c("ID", "minDistance", "meanDistance", "maxDistance", "number")
distances <- na.omit(DNAdistances)
base::message(base::paste0(sysTimePID(), " found n = ", nrow(distances), " neigbouring CpG with distance <=", maxDistanceToLook, ""))
#tbc(): list works, but tibble not
# DNAdistances <- tibble::rownames_to_column(as.data.frame(DNAdistances), var = "rowname")
},
error = function(e) {
base::message("An error occurred in calculateDistanceNeigboursProbes():\n", e)
browser()
},
warning = function(w) {
base::message("A warning occurred in calculateDistanceNeigboursProbes():\n", w)
browser()
},
finally = {
base::print(base::paste0(sysTimePID(), " end calculateDistanceNeigboursProbes()."))
return(DNAdistances)
}
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.