R/discordant.R
In discordant: The Discordant Method: A Novel Approach for Differential Correlation

Documented in createVectors discordantRun fishersTrans splitMADOutlier

# Applied from Lai et al 2007 Bioinformatics.

# Code written by Charlotte Siska
# R functions fisherTrans, createVectors, discordantRun and makeTable
# Email: charlotte.siska@ucdenver.edu

# Code written by Yinglei Lai
# C code and R functions unmap and em.normal.partial.concordant
# E-mail: ylai@gwu.edu

# Code written by Fang Huaying
# R function for SparCC, adapted from https://bitbucket.org/yonatanf/sparcc
# Email: hyfang@pku.edu.cn

################################################################################
# File: SparCC.R
# Aim : SparCC 
#-------------------------------------------------------------------------------
# Author: Fang Huaying (Peking University)
# Email : hyfang@pku.edu.cn
# Date  : 11/12/2014
#-------------------------------------------------------------------------------
# SparCC for counts known
#   function: SparCC.count
#   input:
#          x ------ nxp count data matrix, row is sample, col is variable
#       imax ------ resampling times from posterior distribution. default 20
#       kmax ------ max iteration steps for SparCC. default is 10
#      alpha ------ the threshold for strong correlation. default is 0.1
#       Vmin ------ minimal variance if negative variance appears. default is 1e-4
#   output: a list structure
#      cov.w ------ covariance estimation
#      cor.w ------ correlation estimation
#

SparCC.count <- function(x, imax = 20, kmax = 10, alpha = 0.1, Vmin = 1e-4) {
  # dimension for w (latent variables)
  p <- ncol(x);
  n <- nrow(x);
  # posterior distribution (alpha)
  x <- x + 1;
  # store generate data
  y <- matrix(0, n, p);
  # store covariance/correlation matrix
  cov.w <- cor.w <- matrix(0, p, p);
  indLow <- lower.tri(cov.w, diag = TRUE);
  # store covariance/correlation for several posterior samples
  covs <- cors <- matrix(0, p * (p + 1) / 2, imax);
  for(i in 1:imax) {
    # generate fractions from posterior distribution
    y <- t(apply(x, 1, function(x) rdirichlet(n = 1, alpha = x)));
    # estimate covariance/correlation
    cov_cor <- SparCC.frac(x = y, kmax = kmax, alpha = alpha, Vmin = Vmin);
    # store variance/correlation only low triangle 
    covs[, i] <- cov_cor$cov.w[indLow];
    cors[, i] <- cov_cor$cor.w[indLow];
  }
  # calculate median for several posterior samples
  cov.w[indLow] <- apply(covs, 1, median); 
  cor.w[indLow] <- apply(cors, 1, median);
  #
  cov.w <- cov.w + t(cov.w);
  diag(cov.w) <- diag(cov.w) / 2;
  cor.w <- cor.w + t(cor.w);
  diag(cor.w) <- 1;
  #
  return(list(cov.w = cov.w, cor.w = cor.w));
}

#-------------------------------------------------------------------------------
# SparCC for fractions known
#   function: SparCC.frac
#   input:
#          x ------ nxp fraction data matrix, row is sample, col is variable
#       kmax ------ max iteration steps for SparCC. default is 10
#      alpha ------ the threshold for strong correlation. default is 0.1
#       Vmin ------ minimal variance if negative variance appears. default is 1e-4
#   output: a list structure
#      cov.w ------ covariance estimation
#      cor.w ------ correlation estimation
SparCC.frac <- function(x, kmax = 10, alpha = 0.1, Vmin = 1e-4) {
  # Log transformation
  x <- log(x);
  p <- ncol(x);
  # T0 = var(log(xi/xj)) variation matrix
  TT <- stats::var(x);
  T0 <- diag(TT) + rep(diag(TT), each = p) - 2 * TT;
  # Variance and correlation coefficients for Basic SparCC  
  rowT0 <- rowSums(T0);
  var.w <- (rowT0 - sum(rowT0) / (2 * p - 2))/(p - 2);
  var.w[var.w < Vmin] <- Vmin;
  #cor.w <- (outer(var.w, var.w, "+") - T0 ) / 
  #  sqrt(outer(var.w, var.w, "*")) / 2;
  Is <- sqrt(1/var.w);
  cor.w <- (var.w + rep(var.w, each = p) - T0) * Is * rep(Is, each = p) * 0.5;
  # Truncated correlation in [-1, 1]
  cor.w[cor.w <= - 1] <- - 1; 
  cor.w[cor.w >= 1] <- 1;
  # Left matrix of estimation equation
  Lmat <- diag(rep(p - 2, p)) + 1; 
  # Remove pairs
  rp <- NULL;
  # Left components
  cp <- rep(TRUE, p);
  # Do loops until max iteration or only 3 components left
  k <- 0;  
  while(k < kmax && sum(cp) > 3) {
    # Left T0 = var(log(xi/xj)) after removing pairs
    T02 <- T0;
    # Store current correlation to find the strongest pair
    curr_cor.w <- cor.w;
    # Remove diagonal
    diag(curr_cor.w) <- 0;
    # Remove removed pairs
    if(!is.null(rp)) {
      curr_cor.w[rp] <- 0;
    }
    # Find the strongest pair in vector form
    n_rp <- which.max(abs(curr_cor.w));
    # Remove the pair if geater than alpha
    if(abs(curr_cor.w[n_rp]) >= alpha) {
      # Which pair in matrix form
      t_id <- c(arrayInd(n_rp, .dim = c(p, p)));
      Lmat[t_id, t_id] <- Lmat[t_id, t_id] - 1;
      # Update remove pairs
      n_rp <- c(n_rp, (p + 1) * sum(t_id) - 2 * p - n_rp);
      rp <- c(rp, n_rp);
      # Update T02
      T02[rp] <- 0;
      # Which component left
      cp <- (diag(Lmat) > 0);
      # Update variance and truncated lower by Vmin
      var.w[cp] <- solve(Lmat[cp, cp], rowSums(T02[cp, cp]));
      var.w[var.w <= Vmin] <- Vmin;
      # Update correlation matrix and truncated by [-1, 1]
      #cor.w <- (outer(var.w, var.w, "+") - T0 ) / 
      #  sqrt(outer(var.w, var.w, "*")) / 2;    
      Is <- sqrt(1/var.w);
      cor.w <- (var.w + rep(var.w, each = p) - T0) * 
        Is * rep(Is, each = p) * 0.5;
      # Truncated correlation in [-1, 1]
      cor.w[cor.w <= - 1] <- - 1;
      cor.w[cor.w >= 1] <- 1;
    }
    else {
      break;
    }
    # 
    k <- k + 1;
  }
  # Covariance
  Is <- sqrt(var.w);
  cov.w <- cor.w * Is * rep(Is, each = p);
  #
  return(list(cov.w = cov.w, cor.w = cor.w));
}



#modified from package mclust
unmap <- function(classification){
    n <- length(classification)
    u <- sort(unique(classification))
    labs <- as.character(u)
    k <- length(u)
    z <- matrix(0, n, k)
    for (j in 1:k) z[classification == u[j], j] <- 1
    dimnames(z) <- list(NULL, labs)
    return(z)
}


em.normal.partial.concordant <- function(data, class, tol=0.001, 
    restriction=0, constrain=0, iteration=1000){
    n <- as.integer(dim(data)[1])
    g <- as.integer(nlevels(as.factor(class)))

    yl.outer <- function(k, zx, zy){
        return( c(zx[k,] %o% zy[k,]) )
    }
    yl.diag <- function(k, z){
        return( c(diag(z[k,])) )
    }

    zx <- unmap(class[,1])
    zy <- unmap(class[,2])
    zxy <- sapply(1:dim(zx)[1], yl.outer, zx, zy)

    pi <- double(g*g)
    mu <- double(g)
    sigma <- double(g)
    nu <- double(g)
    tau <- double(g)
    loglik <- double(1)
    convergence <- integer(1)
    results <- .C("em_normal_partial_concordant", as.double(data[,1]), 
        as.double(data[,2]), as.double(t(zxy)), n, pi, mu, sigma, nu, tau, g, 
        loglik, as.double(tol), as.integer(restriction), 
        as.integer(constrain), as.integer(iteration), convergence)
    return(list(model="PCD", convergence=results[[16]], 
        pi=t(array(results[[5]], dim=c(g,g))), mu_sigma=rbind(results[[6]], 
        results[[7]]), nu_tau=rbind(results[[8]], results[[9]]), 
        loglik=results[[11]], class=apply(array(results[[3]], dim=c(n,g*g)),1,
        order,decreasing=TRUE)[1,], z=array(results[[3]], dim=c(n,g*g))))
}

subSampleData <- function(pdata, class, mu, sigma, nu, tau, pi) {
    n <- as.integer(dim(pdata)[1])
    g <- as.integer(nlevels(as.factor(class)))

    yl.outer <- function(k, zx, zy){
        return( c(zx[k,] %o% zy[k,]) )
    }
    yl.diag <- function(k, z){
        return( c(diag(z[k,])) )
    }

    zx <- unmap(class[,1])
    zy <- unmap(class[,2])
    zxy <- sapply(1:dim(zx)[1], yl.outer, zx, zy)

    results <- .C("subsampling", as.double(pdata[,1]), as.double(pdata[,2]), 
        as.double(t(zxy)), n, as.double(pi), as.double(mu), as.double(sigma), 
        as.double(nu), as.double(tau), g)
    return(list(pi=t(array(results[[5]],dim=c(g,g))), 
        mu_sigma=rbind(results[[6]], results[[7]]), nu_tau=rbind(results[[8]],
        results[[9]]), class=apply(array(results[[3]], dim=c(n,g*g)),1,order,
        decreasing=TRUE)[1,], z=array(results[[3]], dim=c(n,g*g))))
} 

fishersTrans <- function(rho) {
    r = (1+rho)/(1-rho)
    z = 0.5*log(r,base = exp(1))
    return(z)
}

getNames <- function(x, y = NULL) {
    if(is.null(y) == FALSE) {
        namesMatrix <- NULL
        for(i in 1:nrow(x)) {
            tempMatrix <- cbind(rep(rownames(x)[i],nrow(y)), rownames(y))
            namesMatrix <- rbind(namesMatrix, tempMatrix)
        }
    }  

    if(is.null(y)) {
        temp <- matrix(NA,nrow = nrow(x), ncol = nrow(x))
        diag <- lower.tri(temp, diag = FALSE)
        temp[diag] <- rep(1, sum(diag == TRUE))

        namesMatrix <- NULL

        for(i in 1:dim(temp)[1]) {
            outputRow <- temp[i,]
            index <- which(is.na(outputRow) == FALSE)
            if(length(index) > 0) {
                tempMatrix <- cbind(rep(rownames(x)[i],length(index)), 
                    rownames(x)[index])
                namesMatrix <- rbind(namesMatrix, tempMatrix)
            }
        }
    }

    vector_names <- apply(namesMatrix, 1, function(k) paste(k[1],"_",k[2],
        sep = ""))
    return(vector_names)
}

checkInputs <- function(x,y,groups = NULL) {
    issue = FALSE
    if(is.null(groups) == FALSE  && unique(unique(groups) != c(1,2))) {
        print("groups vector must consist of 1s and 2s corresponding to first
             and second group.")
        issue = TRUE
    }
    if(mode(x) != "S4" || (is.null(y) == FALSE && mode(y) != "S4")) {
        print("data matrices x and/or y must be type ExpressionSet")
        issue = TRUE
    }
    return(issue)
}

createVectors <- function(x, y = NULL, groups, cor.method = c("spearman")) {
    print(x)
    if(checkInputs(x,y,groups)) {
        stop("Please fix inputs.")
    }

    index1 <- which(groups == 1)
    index2 <- which(groups == 2)

    check <- match(cor.method, c("spearman", "pearson", "bwmc", "sparcc"))
    if(is.na(check)) {
        stop("Please enter spearman, pearson, bwmc or sparcc for correlation 
            metric.")
    }

    x <- exprs(x)

    if(is.null(y) == FALSE) {
        y <- exprs(y)
        data <- rbind(x, y)
        data1 <- data[,index1]
        data2 <- data[,index2]
        featureSize = dim(x)[1]
        if(cor.method == c("spearman") || cor.method == c("pearson")) {
            statMatrix1 <- cor(t(data1), method = cor.method)
            statMatrix2 <- cor(t(data2), method = cor.method)
        }
        if(cor.method == c("bwmc")) {
            statMatrix1 <- biwt.cor(data1)
            statMatrix2 <- biwt.cor(data2)
        }
        if(cor.method == c("sparcc")) {
            if(min(data) < 0) {
                stop("SparCC can only be applied to sequencing data.")
            }
            statMatrix1 <- SparCC.count(t(data1))
            statMatrix2 <- SparCC.count(t(data2))
        }
        statMatrix1 <- statMatrix1[1:featureSize,
            (featureSize + 1):dim(data1)[1]]
        statMatrix2 <- statMatrix2[1:featureSize,
            (featureSize + 1):dim(data1)[1]]
        statVector1 <- as.vector(statMatrix1)
        statVector2 <- as.vector(statMatrix2)
        vector_names <- getNames(x,y)
    }

    if(is.null(y)) {
        data1 <- x[,index1]
        data2 <- x[,index2]
        if(cor.method == c("spearman") || cor.method == c("pearson")) {
            statMatrix1 <- cor(t(data1), method = cor.method)
            statMatrix2 <- cor(t(data2), method = cor.method)
        }   
        if(cor.method == c("bwmc")) {
            statMatrix1 <- biwt.cor(data1)
            statMatrix2 <- biwt.cor(data2)
        }   
        if(cor.method == c("sparcc")) {
            if(min(data) < 0) {
                stop("SparCC can only be applied to sequencing data.")
            }   
        statMatrix1 <- SparCC.count(t(data1))
        statMatrix2 <- SparCC.count(t(data2))
        }   
        statVector1 <- as.vector(statMatrix1)
        statVector2 <- as.vector(statMatrix2)
        diag <- lower.tri(statMatrix1, diag = FALSE)
        statVector1 <- statMatrix1[diag]
        statVector2 <- statMatrix2[diag]
        vector_names <- getNames(x)
    }

    names(statVector1) <- vector_names
    names(statVector2) <- vector_names

    return(list(v1 = statVector1, v2 = statVector2))
}

discordantRun <- function(v1, v2, x, y = NULL, transform = TRUE, 
    subsampling = FALSE, subSize = dim(x)[1], iter = 100, components = 3) {

    if(checkInputs(x,y)) {
        stop("Please fix inputs.")
    }

    if(transform == TRUE) {
        if(range(v1)[1] < -1 || range(v1)[2] > 1 || range(v2)[1] < -1 || 
            range(v2)[2] > 1) {
            stop("correlation vectors have values less than -1 and/or greater 
                than 1.")
        }
        v1 <- fishersTrans(v1)
        v2 <- fishersTrans(v2)
    }

    check <- match(components, c(3,5))
    if(is.na(check)) {
        stop("components must be 3 or 5")
    }

    x <- exprs(x)
    if(is.null(y) == FALSE) { y <- exprs(y) }
    featureSize = dim(x)[1]

    pdata <- cbind(v1, v2)
	
    param1 <- sd(v1)
    param2 <- sd(v2)
	
    if(components == 3) {
        class <- cbind(rep(0, dim(pdata)[1]), rep(0, dim(pdata)[1]))
        class[pdata[,1]>0+param1,1] <- 2
        class[pdata[,1]<0-param1,1] <- 1
        class[pdata[,2]>0+param2,2] <- 2
        class[pdata[,2]<0-param2,2] <- 1
        discordClass <- c(2,3,4,6,7,8)
    }

    if(components == 5) {
        class <- cbind(rep(0, dim(pdata)[1]), rep(0, dim(pdata)[1]))
        class[pdata[,1]>0+param1,1] <- 2
        class[pdata[,1]<0-param1,1] <- 1
        class[pdata[,1]>0+(2*param1),1] <- 4
        class[pdata[,1]<0-(2*param1),1] <- 3
        class[pdata[,2]>0+param2,2] <- 2
        class[pdata[,2]<0-param2,2] <- 1
        class[pdata[,2]>0+(2*param2),2] <- 4
        class[pdata[,2]<0-(2*param2),2] <- 3
        concordClass <- c(1,7,13,19,25)
        discordClass <- setdiff(1:25,concordClass)
    }

    if(subsampling == TRUE) {
        subSize = nrow(x)
        if(is.null(y) == FALSE & nrow(y) < subSize) {
            subSize = nrow(y)
        }
        total_mu <- rep(0,3)
        total_sigma <- rep(0,3)
        total_nu <- rep(0,3)
        total_tau <- rep(0,3)
        total_pi <- rep(0,3)
        for(i in 1:iter) {
        # make sure pairs are independent
            rowIndex <- sample(nrow(x), subSize)
            colIndex <- sample(nrow(y), subSize)
            mat1 <- matrix(v1, nrow = nrow(x), byrow = FALSE)
            mat2 <- matrix(v2, nrow = nrow(x), byrow = FALSE)

            subSampV1 <- sapply(1:subSize, function(x) mat1[rowIndex[x], 
                colIndex[x]])
            subSampV2 <- sapply(1:subSize, function(x) mat2[rowIndex[x], 
                colIndex[x]])

            sub.pdata <- cbind(subSampV1, subSampV2)
            sub.class <- cbind(rep(0, subSize), rep(0, subSize))
            if(components == 3) {
                sub.class[sub.pdata[,1]>0+param1,1] <- 2
                sub.class[sub.pdata[,1]<0-param1,1] <- 1
                sub.class[sub.pdata[,2]>0+param2,2] <- 2
                sub.class[sub.pdata[,2]<0-param2,2] <- 1
            }
            if(components == 5) {
                sub.class[sub.pdata[,1]>0+param1,1] <- 2
                sub.class[sub.pdata[,1]<0-param1,1] <- 1
                sub.class[sub.pdata[,1]>0+(2*param1),1] <- 4
                sub.class[sub.pdata[,1]<0-(2*param1),1] <- 3
                sub.class[pdata[,2]>0+param2,2] <- 2
                sub.class[pdata[,2]<0-param2,2] <- 1
                sub.class[pdata[,2]>0+(2*param2),2] <- 4
                sub.class[pdata[,2]<0-(2*param2),2] <- 3
            }
            pd <- em.normal.partial.concordant(sub.pdata, sub.class, 
                tol=0.001, restriction=0, constrain=c(0,-sd(pdata),sd(pdata)),
                iteration=1000)
            total_mu <- total_mu + pd$mu_sigma[1,]
            total_sigma <- total_sigma + pd$mu_sigma[2,]
            total_nu <- total_nu + pd$nu_tau[1,]
            total_tau <- total_tau + pd$nu_tau[2,]
            total_pi <- total_pi + pd$pi
        }

        mu <- total_mu/iter
        sigma <- total_sigma/iter
        nu <- total_nu/iter
        tau <- total_tau/iter
        pi <- total_pi/iter

        finalResult <- subSampleData(pdata, class, mu, sigma, nu, tau, pi)
        zTable <- finalResult$z
        classVector <- finalResult$class
    } else {
        pd <- em.normal.partial.concordant(pdata, class, tol=0.001, 
            restriction=0, constrain=c(0,-sd(pdata),sd(pdata)), 
            iteration=1000)
        zTable <- pd$z
        classVector <- pd$class
    }

    discordPPV <- apply(zTable, 1, function(x) sum(x[discordClass])/sum(x))

    if(is.null(y) == FALSE) {
        discordPPMatrix <- matrix(discordPPV, nrow = featureSize, 
            byrow = FALSE)
        classMatrix <- matrix(classVector, nrow = featureSize, byrow = FALSE)
        rownames(discordPPMatrix) <- rownames(x)
        colnames(discordPPMatrix) <- rownames(y)
        rownames(classMatrix) <- rownames(x)
        colnames(classMatrix) <- rownames(y)

        vector_names <- getNames(x,y)
        names(discordPPV) <- vector_names
        names(classVector) <- vector_names
    }

    if(is.null(y)) {
        discordPPMatrix <- matrix(NA,nrow = featureSize, ncol = featureSize)
        classMatrix <- discordPPMatrix
        diag <- lower.tri(discordPPMatrix, diag = FALSE)
        discordPPMatrix[diag] <- discordPPV
        classMatrix[diag] <- classVector
        rownames(discordPPMatrix) <- rownames(x)
        colnames(discordPPMatrix) <- rownames(x)
        rownames(classMatrix) <- rownames(x)
        colnames(classMatrix) <- rownames(x)
        vector_names <- getNames(x)
        names(discordPPV) <- vector_names
        names(classVector) <- vector_names
    }

    zTable <- t(apply(zTable, 1, function(x) x/sum(x)))
    rownames(zTable) <- vector_names

    return(list(discordPPMatrix = discordPPMatrix, discordPPVector = 
        discordPPV, classMatrix = classMatrix, classVector = classVector, 
        probMatrix = zTable, loglik = pd$loglik))
}

splitMADOutlier <- function(mat, filter0 = TRUE, threshold = 2) {
    if(mode(mat) != "S4") {
        stop("data matrix mat must be type ExpressionSet")
    }
    mat <- exprs(mat)
    maxMAD <- c()
    mat.filtered <- NULL
    index <- c()
    for(i in 1:nrow(mat)) {
        y <- mat[i,]
        x <- y
        if(length(which(is.infinite(x))) >0 ){
            x <- x[-which(is.infinite(x))]
        }
        if(length(which(is.na(x)))> 0) {
            x <- x[-which(is.na(x))]
        }
        median.x <- median(x)
        left <- x[x <= median.x]
        right <- x[x > median.x]
        left.mad <- mad(left)
        right.mad <- mad(right)
        leftThresh <- median(left) - left.mad*threshold
        rightThresh <- median(right) + right.mad*threshold 
        left.check <- sum(left < leftThresh)
        right.check <- sum(right > rightThresh)
        if(left.check == 0 & right.check == 0) {
            mat.filtered <- rbind(mat.filtered, y)
            rownames(mat.filtered)[nrow(mat.filtered)] <- rownames(mat)[i]
            index <- c(index,i)
        }
    }
    colnames(mat.filtered) = colnames(mat)
    return(list(mat.filtered = mat.filtered, index = index))
}
Any scripts or data that you put into this service are public.
discordant documentation built on Nov. 8, 2020, 4:52 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
discordant
The Discordant Method: A Novel Approach for Differential Correlation

R/discordant.R
In discordant: The Discordant Method: A Novel Approach for Differential Correlation

Defines functions splitMADOutlier discordantRun createVectors checkInputs getNames fishersTrans subSampleData em.normal.partial.concordant unmap SparCC.frac SparCC.count

Documented in createVectors discordantRun fishersTrans splitMADOutlier

Try the discordant package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

discordant The Discordant Method: A Novel Approach for Differential Correlation

R/discordant.R In discordant: The Discordant Method: A Novel Approach for Differential Correlation

Defines functions splitMADOutlier discordantRun createVectors checkInputs getNames fishersTrans subSampleData em.normal.partial.concordant unmap SparCC.frac SparCC.count

Documented in createVectors discordantRun fishersTrans splitMADOutlier

Try the discordant package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

discordant
The Discordant Method: A Novel Approach for Differential Correlation

R/discordant.R
In discordant: The Discordant Method: A Novel Approach for Differential Correlation