R/make_VCV_matrix.R

Defines functions vcalc make_VCV_matrix

Documented in make_VCV_matrix vcalc

#' @title make_VCV_matrix 
#' @description Function for generating simple covariance and correlation matrices based on a clustered variable
#' @param data Dataframe object containing effect sizes, their variance, unique IDs and clustering variable
#' @param matrix Sometimes clustering can get quite complicated. Here you can 'daisy' chain matrices for different levels of clustering to build a combined matrix. Just add in the matrix with an existing cluster and set up a new cluster.
#' @param V Name of the variable (as a string – e.g, "V1") containing effect size variances variances
#' @param m Mean of the control group that is shared. Only used when vcal does not equal "none".
#' @param sd Standard deviation of the control group that is shared. Only used when vcal does not equal "none".
#' @param n Sample size of the control group that is shared.Only used when vcal does not equal "none".
#' @param cluster Name of the variable (as a string – e.g, "V1") indicating which effects belong to the same cluster. Same value of 'cluster' are assumed to be nonindependent (correlated).
#' @param obs Name of the variable (as a string – e.g, "V1") containing individual IDs for each value in the V (Vector of variances). If this parameter is missing, label will be labelled with consecutive integers starting from 1.
#' @param rho Known or assumed correlation value among effect sizes sharing same 'cluster' value. Default value is 0.5.
#' @param type Optional logical parameter indicating whether a full variance-covariance matrix (default or "vcv") is needed or a correlation matrix ("cor") for the non-independent blocks of variance values.
#' @param vcal The calculation of the covariance. Defaults to "none" in which case rho is used. Otherwise, "ROM" (log response ratio) or "LOR" (log odds ratios) can be calculated based on large-sample approximations. 
#' @examples {
#' data(sparrows)
#' # Add sampling variance
#' sparrows$v <- 1 / (sparrows$SampleSize - 3)
#' # Fake grouping
#' sparrows$group1 <- rep(1:2, length.out = nrow(sparrows))
#' # Calculate V based on Place grouping, just for demonstration purposes. 
#' V <- make_VCV_matrix(data = sparrows, V = "v", cluster = "Place", type = "vcv", vcal = "none", rho = 0.5)
#' # Now we an add the second level of clustering
#' V <- make_VCV_matrix(data = sparrows, matrix = V, V = "v", cluster = "group1", type = "vcv", vcal = "none", rho = 0.5)
#' 
#' }
#' @export

make_VCV_matrix <- function(data, matrix = NULL, V, m, sd, n, cluster, obs, type=c("vcv", "cor"), vcal = c("none", "lnOR", "ROM"), rho=0.5){
  
  type <- match.arg(type, choices = type)
  vcal <- match.arg(vcal, choices = vcal)

  if (missing(data)) {
    stop("Must specify dataframe via 'data' argument.")
  }
  if (missing(V)) {
    stop("Must specify name of the variance variable via 'V' argument.")
  }
  if (missing(cluster)) {
    stop("Must specify name of the clustering variable via 'cluster' argument.")
  }
  if (missing(obs)) {
    obs <- 1:length(V)   
  }
  if (missing(type)) {
    type <- "vcv" 
  }
  
  if (vcal != "none") {
    if(missing(m) | missing(n) | missing(sd)){
    stop("Must specify m, sd, and n arguments so that the covariance can be correctly calculated.")}
  }

  if(is.null(matrix)){
    new_matrix <- matrix(0,nrow = dim(data)[1],ncol = dim(data)[1]) #make empty matrix of the same size as data length
    rownames(new_matrix) <- data[ ,obs]
    colnames(new_matrix) <- data[ ,obs]
    } else {
      new_matrix <- matrix
    }

  # find start and end coordinates for the subsets
  tmp <- duplicated(data[ ,cluster])
  shared_coord <- which(data[ ,cluster] %in% data[tmp, cluster]==TRUE)
  # matrix of combinations of coordinates for each experiment with shared control
  combinations <- do.call("rbind", tapply(shared_coord, data[shared_coord,cluster], function(x) t(utils::combn(x,2))))
  
  if(type == "vcv"){
    # calculate covariance values between  values at the positions in shared_list and place them on the matrix
    for (i in 1:dim(combinations)[1]){
      p1 <- combinations[i,1]
      p2 <- combinations[i,2]
      
      if(vcal == "none"){
        p1_p2_cov <- rho * sqrt(data[p1,V]) * sqrt(data[p2,V])
      } else {
        p1_p2_cov <- vcalc(m, sd, n, p1, data, cov_type = vcal)
      }

      new_matrix[p1,p2] <- p1_p2_cov
      new_matrix[p2,p1] <- p1_p2_cov
    }
    diag(new_matrix) <- data[ ,V]   #add the diagonal
  }
  
  if(type == "cor"){
    # calculate covariance values between  values at the positions in shared_list and place them on the matrix
    for (i in 1:dim(combinations)[1]){
      p1 <- combinations[i,1]
      p2 <- combinations[i,2]
      p1_p2_cov <- rho
      new_matrix[p1,p2] <- p1_p2_cov
      new_matrix[p2,p1] <- p1_p2_cov
    }
    diag(new_matrix) <- 1   #add the diagonal of 1
  }
  
  rownames(new_matrix) <- 1:dim(new_matrix)[1]
  colnames(new_matrix) <- 1:dim(new_matrix)[1]

  return(new_matrix)
}


#Value: Labelled full variance-covariance or correlation matrice of the size and labels matching initial dataframe will be returned 

## TO DO: Implement a shared control equations for SMD and lnRR, maybe similar to eho = 0.5, but we have exact calculations so probably worth implementing

#' @title vcalc 
#' @description Function for calculating the covariance based on large-sample approximations for various effect size estimates.
#' @param data Dataframe object containing effect sizes, their variance, unique IDs and clustering variable
#' @param m Mean of the control group that is shared. Only used when vcal does not equal "none".
#' @param sd Standard deviation of the control group that is shared. Only used when vcal does not equal "none".
#' @param n Sample size of the control group that is shared.Only used when vcal does not equal "none".
#' @param p1 The row indicator for the shared control.
#' @param cov_type The type of covariance to be calculated. Currently calculations for log response ratio ("ROM"), and log odds ratio (LOR) are available. 
#' @export
#' 
vcalc <- function(m, sd, n, p1, data, cov_type = c("ROM", "LOR")){
  
  cov_type <- match.arg(cov_type, choices = cov_type)

  if(cov_type == "ROM"){
    # Covariance for shared control when using log response ratio. From Jajeunesse 2011. Ecology
    cov <- data[p1, sd]^2 / (data[p1, n] * data[p1, m]^2)
  }

  if(cov_type == "LOR"){
    #formula for odds ratio (1/x + 1/(m-x))
    cov <- (1/data[p1, m] + 1) / (data[p1, n] - data[p1, m])
  }
  return(cov)
}
daniel1noble/metaAidR documentation built on Oct. 14, 2023, 3:23 p.m.