#' Ordering the molecules/reads
#'
#' This function performs the weighted seriation procedure
#' described in the methylscaper manuscript if the method is set to "PCA".
#' The data may also be ordered using a given seriation method from the
#' seriation R package. The weighting is done between a designated start and end
#' base pair chosen by the user, and the weight can be done on the
#' endogenous methylation or the accessibility.
#'
#' @param dataIn A list object containing two elements labelled gch and hcg (already pre-processed.)
#' @param Method Indicates the seriation method to use. The default option
#' is "PCA", which orders the data using a weighted first principal component approach. Any
#' seriation method provided in the \code{seriation} package is also valid input.
#' @param weightStart Index of the first column used in the weighted seriation.
#' @param weightEnd Index of the last column used in the weighted seriation.
#' @param weightFeature Indicates whether to weight the GCH or HCG data.
#' Valid input to weight the GCH is 'gch', 'acc', or 'yellow'. To weight the HCG,
#' valid input for this option is 'hcg', 'met', or 'red'.
#' @param updateProgress A function to handle the progress bar for the
#' Shiny app. Should not be used when using the function independently.
#'
#' @return An object of class \code{orderObject}, which contains the generated
#' ordering ($order1) and a clean data matrix ($toClust)
#' to be passed into the plotting function plotSequence().
#' @importFrom seriation seriate get_order
#' @importFrom stats as.dist dist
#' @importFrom Rfast Dist
#' @export
#' @examples
#'
#' data(singlemolecule_example)
#'
#' orderObj <- initialOrder(singlemolecule_example)
initialOrder <- function(dataIn, Method="PCA", weightStart=NULL,
weightEnd=NULL, weightFeature="red",
updateProgress = NULL){
input_GCH <- dataIn$gch
input_HCG <- dataIn$hcg
## File checks:
if(is.null(updateProgress) & !is.list(dataIn)) {
stop("No valid sites in designated range. Choose another gene or adjust
start and end positions with a larger range")}
if (nrow(input_HCG) != nrow(input_GCH)) {
stop("Input files have different numbers of rows.")}
if (all(rownames(input_GCH) == input_GCH[,1])) input_GCH <- input_GCH[,-1]
if (all(rownames(input_HCG) == input_HCG[,1])) input_HCG <- input_HCG[,-1]
if (is.function(updateProgress)) {
updateProgress(message = "Recoding input data", value = 0.1)}
recoded <- recode(input_GCH, input_HCG)
input_GCH <- recoded$input_GCH
input_HCG <- recoded$input_HCG
weightFeature <- tolower(weightFeature)
if (nrow(dataIn$gch) == 1) {
toClust <- t(as.matrix(c(input_GCH, input_HCG)))
order1 <- 1
orderObject <- list(toClust = toClust, order1 = order1)
}
else {
toClust <- cbind(input_GCH, input_HCG)
weighted = FALSE
# (Optional) Weighting: Adds a variable indicating the number
# of red or yellow patches at specific DNA location
if (!is.null(weightStart) & !is.null(weightEnd)) {
if (is.function(updateProgress)) {
updateProgress(message = "Weighting selected columns", value = 0.2)
}
weighted = TRUE
if (weightStart < 1) weightStart <- 1
if (weightEnd > ncol(input_HCG)) {
print("Setting weightEnd to maximum columns")
weightEnd <- ncol(input_HCG)
}
if (weightFeature == "red" | weightFeature == 'hcg' | weightFeature == 'met') {
FEATURE = 3
weightVector <- apply(input_HCG[,seq(weightStart,weightEnd)],
1, function(x) sum(x==FEATURE))
} else if (weightFeature == "yellow" | weightFeature == 'gch' | weightFeature == 'acc') {
FEATURE = -3
weightVector <- apply(input_GCH[,seq(weightStart,weightEnd)],
1, function(x) sum(x==FEATURE))
} else {
print("weightFeature value is not valid, see ?weightFeature for valid values")
}
weightVector[weightVector == 0] <- 1 # we dont want to have 0 weights
}
if (is.function(updateProgress)) {
updateProgress(message = paste("Ordering with", Method), value = 0.35)}
## PCA should be the default method:
if (Method=="PCA") {
if (weighted) {
w <- weightVector / sum(weightVector)
w_sqrt <- sqrt(w)
toClust_weighted <- diag(w_sqrt) %*% toClust
col_centered <- apply(toClust_weighted, 2, function(x) x - mean(x))
try1 <- svd(col_centered, nu = 1, nv = 0)
order1 <- order(try1$u[,1])
} else {
col_centered <- apply(toClust, 2, function(x) x - mean(x))
try1 <- svd(col_centered, nu = 1, nv = 0)
order1 <- order(try1$u[,1])
}
} else {
if (weighted) {
w <- weightVector / sum(weightVector)
w_sqrt <- sqrt(w)
toClust_weighted <- diag(w_sqrt) %*% toClust
distMat <- as.dist(Dist(toClust_weighted,
method = "euclidean"))
order1 <- seriate(distMat, method=Method)
order1 <- get_order(order1)
} else {
distMat <- as.dist(Dist(toClust,method = "euclidean"))
order1 <- seriate(distMat, method=Method)
order1 <- get_order(order1)
}
}
orderObject <- list(toClust = toClust, order1 = order1)
if (weighted) orderObject$weights <- weightVector
if (Method != "PCA") orderObject$distMat <- distMat
}
if (is.function(updateProgress)) {
updateProgress(message = "Done", value = 1)}
return(orderObject)
}
recode <- function(input_GCH, input_HCG)
{
input_GCH[input_GCH=="."] <- 99
input_GCH <- apply(input_GCH, 2, as.numeric)
input_GCH[input_GCH==2] <- -4
input_GCH[input_GCH==1] <- -3
input_GCH[input_GCH==0] <- -2.5
input_GCH[input_GCH==-1] <- -99
input_GCH[input_GCH==-2] <- -1
input_GCH[input_GCH==-99] <- -2
input_GCH[input_GCH==99] <- 0
input_HCG[input_HCG=="."] <- 99
input_HCG <- apply(input_HCG, 2, as.numeric)
input_HCG[input_HCG==2] <- 4
input_HCG[input_HCG==1] <- 3
input_HCG[input_HCG==0] <- 2.5
input_HCG[input_HCG==-1] <- 2
input_HCG[input_HCG==-2] <- 1
input_HCG[input_HCG==99] <- 0
return(list(input_GCH = data.matrix(input_GCH), input_HCG = data.matrix(input_HCG)))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.