R/DASC.R
In DASC: Detecting hidden batch factors through data adaptive adjustment for biological effects

Documented in adj2vector DASC DASC get_father merge Sptree

require(Biobase)
require(NMF)
require(cvxclustr)

#' Batch factor detection via DASC (Data-adaptive Shrinkage and Clustering-DASC)
#'
#' @param edata the normalized target matrix, a data.frame The row is gene,
#' the column is sample
#' @param pdata Phenotypic data summarizes information about the samples
#' @param factor A factor vector which controls the convex clustering
#' @param method Algorithm to use: 'admm' or 'ama'
#' @param type An integer indicating the norm used: 1 = 1-norm 2 = 2-norm
#' 3 = 2-norm^2
#' @param lambda A double number A regularization parameter in the convex
#' optimization
#' @param rank integer sequence
#' @param nrun the iteration numbers of Semi-NMF
#' @param spanning parameter is assigned as false
#' @param annotation An annotation of the dataset
#' @return outputs the result of semi-NMF. It classifies each sample to its
#' batch factor.
#' @details
#' The \code{DASC} function is the main function of our algorithm DASC
#' (Data-adaptive Shrinkage and Clustering-DASC) package. The DASC includes 
#' two main steps
#' \itemize{ \item Data-adaptive shrinkage using convex clustering shrinkage
#' (Implemented by convex optimization.);
#' \item Extract batch factors using matrix factorization.
#' }
#'
#' @import Biobase
#' @import cvxclustr
#' @import NMF
#'
#' @export
#' @examples
#' library(NMF)
#' library(cvxclustr)
#' library(Biobase)
#' dat <- data.frame(matrix(rnbinom(n=200, mu=100, size=1/0.5), ncol=4))
#' pdat <- data.frame(sample = colnames(dat), type = c(rep('A',2), rep('B',2)))
#' rownames(pdat) <- colnames(dat)
#' res <- DASC(edata=dat, pdata=pdat, factor=pdat$type, method='ama', type=3,
#' lambda = 1, rank = 2, nrun = 50, spanning = FALSE,
#' annotation='simulated dataset')
#'
#' @author Haidong Yi, Ayush T. Raman
#'
#' @seealso \code{\link[cvxclustr]{cvxclust_path_ama}} and
#' \code{\link[cvxclustr]{cvxclust_path_admm}} for the detailed algorithm
#'

DASC <- function(edata, pdata, factor, method="ama", type=3, lambda, rank,
                    nrun, spanning=FALSE, annotation){
    if (!is.null(type) && !(type %in% c(1, 2, 3))) {
        stop("type must be '1', '2', '3', or NULL")
    }
    if (!is.null(method) && !(method %in% c("ama", "admm"))) {
        stop("method must be 'ama', 'admm', or NULL")
    }
    edata <- as.matrix(edata)
    Zero <- apply(edata, 1, sd)
    Zero.num <- which(Zero < 0.001)
    if (length(Zero.num) > 0) {
        names(Zero.num) <- NULL
        edata <- edata[-Zero.num, ]
    }
    edata <- log(1 + edata)
    if (type == 3) {
        Laplace <- trans_Laplace(as.factor(factor))
        Udata <- edata %*% solve(diag(ncol(edata)) + lambda * Laplace)
        Udata <- as.matrix(Udata)
        Bdata <- edata - Udata
    } else {
        ADJ <- trans_ADJ(as.factor(factor))
        if (spanning) {
            ADJ <- Sptree(ADJ)
        }
        w <- adj2vector(ADJ, nrow(ADJ))
        sol <- cvxclust(edata, w, lambda, method = method, type = type)
        Bdata <- edata - sol$U[[1]]
    }
    Zero <- apply(Bdata, 1, sd)
    Zero.num <- which(Zero < 0.001)
    if (length(Zero.num) > 0) {
        names(Zero.num) <- NULL
        Bdata <- Bdata[-Zero.num, ]
    }
    metadata <- data.frame(labelDescription = names(pdata),
                            row.names = names(pdata))
    pdata <- new("AnnotatedDataFrame", data = pdata, varMetadata = metadata)
    data_set <- ExpressionSet(assayData = edata, phenoData = pdata,
                                annotation = annotation)
    data.nmf <- nmf(data_set, rank, Semi_NMF, nrun = nrun, .opt = "v",
                        objective = Loss_Fro, seed = Ini_SemiNMF, mixed = TRUE)
    return(data.nmf)
}

#' Transform the adjacency matrix to a vector
#'
#' @param Adjacency the adjacency matrix of factor
#' @param n number of samples
#' @return \code{w} the vector of the adjacency matrix
#' @author Haidong Yi, Ayush T. Raman
#'
#' @export
#' @examples
#' W <- matrix(c(0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0),nrow=4)
#' w <- adj2vector(W,4)
#'

adj2vector <- function(Adjacency, n) {
    w <- double(n * (n - 1) / 2)
    iter <- 1
    for (i in 1:(n - 1)) {
        for (j in (i + 1):n) {
            w[iter] <- Adjacency[i, j]
            iter <- iter + 1
        }
    }
    return(w)
}

#' Representing node in this subtype
#'
#' @param v the index of the node
#' @param X the saved vector with the information of the parent of every node
#' @return \code{r} the parent index of the node
#' @author Haidong Yi, Ayush T. Raman
#'
#' @export
#' @examples
#' nodes <- c(2,3,4,4)
#' get_father(2, nodes)
#'

get_father <- function(v, X) {
    r <- v
    while (X[r] != r) {
        r <- X[r]
    }
    i <- v
    j <- numeric()
    while (i != r) {
        j <- X[i]
        X[i] <- r
        i <- j
    }
    return(r)
}


#' Get Spanning tree from adjacency matrix
#'
#' @param ADJ the adjacency matrix of the factor
#' @return \code{ADJ} the spaning tree of the adjacency matrix
#' @author Haidong Yi, Ayush T. Raman
#'
#' @export
#' @examples
#' W <- matrix(c(0,1,1,1,1,0,1,1,1,1,0,1,1,1,1,0), nrow=4)
#' Sptree(W)
#'

Sptree <- function(ADJ) {
    Rownum <- nrow(ADJ)
    Colnum <- ncol(ADJ)
    father <- numeric()
    for (i in 1:Rownum) {
        father[i] <- i
    }
    for (i in 2:Rownum) {
        for (j in 1:(i - 1)) {
            if (ADJ[i, j] > 0) {
                father <- merge(i, j, father)
            }
        }
    }
    sum <- numeric()
    pre <- numeric()
    j <- 1
    for (i in 1:Rownum) {
        pre[i] <- get_father(i, father)
        if (pre[i] == i) {
            sum[j] <- i
            j <- j + 1
        }
    }
    ADJ <- matrix(0, Rownum, Colnum)
    for (i in 1:Colnum) {
        if (i != pre[i]) {
            ADJ[i, pre[i]] <- 1
            ADJ[pre[i], i] <- 1
        }
    }
    ADJ
}

#' Combine two trees into one
#'
#' @param x the index of the node
#' @param y the index of the node
#' @param X the saved vector with the information of the parent of every node
#' @return \code{X} A updated X vector with updates on father of every node
#' @author Haidong Yi, Ayush T. Raman
#' @details
#'
#' During the traversal of the graph matrix, merge function joins two 
#' disjoint sets into a single subset. It is a union step of Disjoint-set 
#' algorithm by Bernard A. Galler and Michael J. Fischer. For further details, 
#' please refer to: 
#' \url{https://en.wikipedia.org/wiki/Disjoint-set_data_structure}
#'

merge <- function(x, y, X) {
    fx <- get_father(x, X)
    fy <- get_father(y, X)
    if (fx < fy) {
        X[fx] <- fy
    } else {
        X[fy] <- fx
    }
    return(X)
}

Any scripts or data that you put into this service are public.

DASC documentation built on Nov. 17, 2017, 9:36 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

DASC
Detecting hidden batch factors through data adaptive adjustment for biological effects

R/DASC.R
In DASC: Detecting hidden batch factors through data adaptive adjustment for biological effects

Defines functions DASC adj2vector get_father Sptree merge

Documented in adj2vector DASC DASC get_father merge Sptree

Try the DASC package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DASC Detecting hidden batch factors through data adaptive adjustment for biological effects

R/DASC.R In DASC: Detecting hidden batch factors through data adaptive adjustment for biological effects

Defines functions DASC adj2vector get_father Sptree merge

Documented in adj2vector DASC DASC get_father merge Sptree

Try the DASC package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

DASC
Detecting hidden batch factors through data adaptive adjustment for biological effects

R/DASC.R
In DASC: Detecting hidden batch factors through data adaptive adjustment for biological effects