R/gencor.R

Defines functions gencor

Documented in gencor

#' Generates custom correlation matrices
#'
#' This method generates custom correlation matrices based on user-defined limits and/or proportions.
#'
#' @param d Dimension of the generated matrix. If not informed, \code{d} = 10
#' @param method The method of matrix generation.
#' \itemize{
#'  \item "\code{random}": generates a random matrix with the given dimension;
#'  \item "\code{low}": generates a matrix of values between \code{-lim_low} and   \code{lim_low};
#'  \item "\code{medium}": generates a matrix of values in the interval \cr \code{[-lim\_medium, -lim\_low) U (lim\_low, lim\_medium]};
#'  \item "\code{high}": generates a matrix of values between \code{lim_medium} and 1.
#'  \item "\code{custom}": Generates a matrix given the custom limits and proportions of each band defined by the limits.
#' }
#' @param lim_low The lower limit of generated correlations. Applied in \code{low} and \code{medium} methods by standard and in \code{custom} method if \code{custom_lim} are not informed.
#' @param lim_medium The medium limit of generated correlations. Applied in \code{low} and \code{medium} methods and in \code{custom} method if \code{custom_lim} are not informed.
#' @param custom_lim Number or numeric vector with customized limits to generate the correlation matrix.
#' @param custom_prop Vector with custom proportions for every band defined by \code{lim_low} and \code{lim_medium} or \code{custom_lim}. If not defined, the proportions will be equally distributed among the correlation bands.
#' @param nsim Size of vectors used to generate the correlation matrix.
#' @param signal Defines if the signals of the correlation matrix must be chosen at random or all must be positive.
#'  \itemize{
#'   \item "\code{positive}": generates a correlation matrix with all correlations positive. Some negative signals may occur for correlations sufficiently near         zero.
#'   \item "\code{random}": generates a correlation matrix with random signals
#'  }
#' @param custom_precision The precision used in \code{custom} method. It is the maximum difference between \code{custom_prop} and the proportions generated by the function
#' @param custom_nrep The number of iterations in the optimization method used to generate custom correlation matrices.
#' @param sort_intensity Sorts the correlation matrix by intensity.
#' @param random_liminf Sets the lower limit of uniform distribution that generates the standard deviations used in random correlation matrix generation. Must be greater than zero due to convergence problems.
#' @param seed Enables seed definition.
#' 
#' @return \code{gencor(...)} returns an object of class "gencor" with a list of the following objects:
#' \itemize{
#' 
#' \item Matrix - The generated correlation matrix.
#' \item Method - The method used in generation
#' \item Proportions - The observed proportions at each level. The levels are given by default or user defined.
#' \item Runtime - Ellapsed simulation time
#' \item Nsim - Number of iterations needed to achieve the desired correlation matrix. 0 if the chosen method was "random".
#' \item Precision - The precision used on the  optimization method.
#' \item Dimension - The dimension of the generated correlation matrix.
#' \item Sdev - Vector of standard deviations used in generation process.
#' \item Custom_propp - User defined proportions in custom method. NULL if the chosen method was random.
#' \item custom_lim - User defined correlation limits in custom method. NULL if the chosen method was random.
#' \item Signal - Type of signal generation defined by the user, "random" by default.
#' \item Nrep - Size of simulated data matrix used in correlation matrix generation.
#' \item Generated data - Simulated data used in the generation process.
#' 
#' }
#' 
#' 
#' @details This method generates correlation matrices based on the correlations among normal random variables with mean 0 and specified standard deviation values. These specified standard deviation values make possible the control of the correlation coefficient intensity.
#'
#' @examples
#'
#' ## Generates a random correlation matrix with dimension 10
#' gencor()
#'
#' ## Generates a correlation matrix with correlations below 0.3 
#' gencor(15, method = "low", lim_low = 0.3)
#'
#' ## Generates a correlation matrix with correlations between 0.3 and 0.7
#' gencor(15, method = "medium", lim_low = 0.3, lim_medium = 0.7)
#' 
#' ## Generates a correlation matrix with correlations above 0.7
#' gencor(30, method = "high", lim_medium = 0.75)
#' 
#' ## Generates a custom correlation matrix with: 
#' ## - 30% of values below 0.2, 
#' ## - 30% of values between 0.2 and 0.5,
#' ## - 20% of values between 0.5 and 0.8,
#' ## - 20% of values above 0.8
#' gencor(20, method = "custom", custom_lim = c(0.2, 0.5, 0.8), custom_prop = c(0.3, 0.3, 0.2, 0.2))
#'
#'
#' @importFrom stats runif
#' @importFrom stats rnorm
#' @importFrom stats cor
#' @export

gencor <- function(d = 10,
                   method = c("random", "low", "medium", "high", "custom"),
                   custom_prop = NULL,
                   nsim = 1000,
                   lim_low = .3,
                   lim_medium = .6,
                   custom_lim = NULL,
                   signal = c("random", "positive"),
                   custom_precision = .03,
                   custom_nrep = 1000,
                   sort_intensity = F,
                   random_liminf = 0.01,
                   seed = NULL){


  #Enable seed definition
  if(is.null(seed) == F){
    
    set.seed(seed)
    
  }
  
  #Change the custom_liminf for positive matrix
  if(random_liminf == "positive"){
    random_liminf <- 0.05
  }
  
  #Generate standard deviation values based on expected limits ----
  gensd <- function(lim, n = 1){

    return(runif(n, sqrt((1/lim[2])-1), sqrt((1/(lim[1]))-1)))

  }
 
  #Generate correlation matrix based on defined limits
  gen_cor <- function(x, sdev, custom_signal = "random"){

    #Create the matrix by repeating x as columns
    matx <- matrix(rep(x, length(sdev)), ncol = length(sdev), byrow = F)

    #Sets the signals of correlation matrix

    if(custom_signal == 'random'){

      np <- round((length(sdev) + sqrt(length(sdev)))/2)
      vetsig <- c(rep(1, np), 
                  rep(-1, (length(sdev) - np)))
      k <- sample(vetsig, size = length(sdev), replace = F)
      sig <- do.call("rbind",
                     replicate(nsim, rbind(k), simplify = FALSE))
      #matx <- matx %*% sig
      matx <- matx * sig

    }
    
    if(custom_signal == 'positive'){
      
      
      #sig <- diag(sample(c(-1, 1), length(sdev), replace = T))
      #sig <- matrix(rep(sample(c(-1, 1), length(sdev), replace = T), each = nsim), ncol = length(sdev))
      #k <- sample(c(-1, 1), length(sdev), replace = T)
      vetsig <- rep(1, length(sdev))
      k <- sample(vetsig, size = length(sdev), replace = F)
      sig <- do.call("rbind",
                     replicate(nsim, rbind(k), simplify = FALSE))
      #matx <- matx %*% sig
      matx <- matx * sig
      
      
    }

    # if(custom_signal == 'positive'){
    #
    #   sig <- diag(length(sdev))
    # }



    #Generate normal rv based on standard values that attends the limits and sums to initial normal rv
    y <-  matx + mapply(function(x,y){rnorm(x, y, n = nsim)}, x = 0, y = sdev)

    return(list(matcor = cor(y),
                cordata = y))

  }

  #Computes the correlation proportion in the intensity bands
  prop_cor <- function(x, custom_lim = NULL){

    if(is.null(custom_lim)){

      custom_lim <-  c(lim_low, lim_medium)

    }

    cuts <- c(0, custom_lim, 1)

    x <-  abs(as.vector(x))
    x <- x[-which(x == 1)]

    tab <- table(cut(x, cuts))

    tab/sum(tab)

    table(cut(x, cuts))/sum(table(cut(x, cuts)))

  }


  #Initial runtime
  t1 <- Sys.time()

  #Check the compatibility of arguments defined by the user
  method <- match.arg(method)
  signal <- match.arg(signal)

  #Checks if the condition sqrt(lim_low) < lim_medium
  if((lim_medium) <= sqrt(lim_low)){

    stop("lim_medium must be greater than sqrt(lim_low)")
  }

  #If user informs custom limits or proportions, make method = "custom"
  if(is.null(custom_lim) == F){

    method <- "custom"

  }


  #Divides the (0,1) in equal intervals, take the largest value then
  #apply the powers of 2 based on the number of custom proportions
  #to create custom limits
  if((is.null(custom_prop) == F) & is.null(custom_lim) == T){

    method <- "custom"
    pow <- 2^seq(from = 0, to = (length(custom_prop) - 2), by = 1)
    custom_lim <- sort((1 - 1/(length(custom_prop) - 1)) ^ pow, decreasing = F)


  }

  st_method <- ""

  #Generates the initial normal rv
  x <- rnorm(nsim, 0, 1)

  #Generate the sd vector based on the method choice

  #Low correlations matrix ----
  if(method == "low"){

    sdev <- gensd(c(random_liminf, lim_low), d)

    m <- gen_cor(x, sdev, custom_signal = signal)
    matcor <- m$matcor
    gendata <- m$cordata

    st_method = "Low"

  }

  #Medium correlations matrix ----
  if(method == "medium"){

    sdev <- gensd(c(lim_low, lim_medium), d)

    m <- gen_cor(x, sdev, custom_signal = signal)
    matcor <- m$matcor
    gendata <- m$cordata

    st_method = "Medium"

  }

  #High correlation matrix ----
  if(method == "high"){

    sdev <- gensd(c(lim_medium, .9999), d)

    m <- gen_cor(x, sdev, custom_signal = signal)
    matcor <- m$matcor
    gendata <- m$cordata

    st_method = "High"

  }

  #Random correlation matrix ----
  if(method == "random"){

    sdev <- gensd(c(random_liminf, .9999), d)

    m <- gen_cor(x, sdev, custom_signal = signal)
    matcor <- m$matcor
    gendata <- m$cordata

    st_method = "Random"

  }

  ##Custom correlation matrix ----

  nrep <- 0

  #Store the desired proportions
  custom_prop_or <- custom_prop


  if(method == 'custom'){

    st_method <- ifelse(st_method == "Random", "Random", "Custom")

    repeat{

      #If limits are not informed, will be used 0.3 and 0.6 as limits
      if(is.null(custom_lim)){

        custom_lim <- c(lim_low, lim_medium)

      }

      #If the custom proportions are not informed, they'll be defined as discrete uniform
      if(is.null(custom_prop)){

        custom_prop <- rep(1, length(custom_lim)+1) / (length(custom_lim)+1)
        custom_prop_or <- custom_prop

      }

      if(d < length(custom_prop)){

        message("The number of custom proportions must be less or equal the matrix dimesion")

        break
      }


      #Add 0 and 1 to limits vector
      custom_lim_df <- data.frame(custom_vec_li = c(random_liminf, custom_lim),
                                  custom_vec_ls = c(custom_lim, 0.9999))

      #Generate standard deviation by intensity band
      sdev_df <- apply(custom_lim_df, 1, gensd, n = d)

      #Define the elements number in each correlation group and round up
      prop <- ceiling(custom_prop * d)

      #Remove the columns if there's some band with null proportion
      if(length(which(prop == 0)) > 0) {

        sdev_df <- as.data.frame(sdev_df[,-which(prop == 0)])
        prop <- prop[-which(prop == 0)]

      }

      #Generate the standard deviation vector
      sdev <- NULL
      for(i in 1:ncol(sdev_df)){

        sdev <- c(sdev, sample(sdev_df[,i], size = prop[i], replace = F))

      }



      #Delete at random elements that exceeds the desired matrix size
      if(length(sdev)>d){

        sdev <- sdev[-sample(1:length(sdev), size = length(sdev) - d)]
      }


      #Sorts the standard deviation order
      if(sort_intensity == F){

        sdev <- sdev[sample(1:length(sdev))]
      }

      if(sort_intensity == T){

        sdev <- sort(sdev)
      }


      #Generate the correlation matrix
      m <- gen_cor(x, sdev, custom_signal = signal)
      matcor <- m$matcor
      gendata <- m$cordata



      #Computes the difference between the desired and computed proportions
      dif <- prop_cor(matcor, custom_lim) - custom_prop_or


      #Check the conditions and if they are satisfied, returns the matrix
      if(max(abs(dif)) < custom_precision){

        break

      }


      #Optimization method
      if(nrep < custom_nrep){

        if((max(abs(dif)) > custom_precision) & runif(1) > nrep/custom_nrep){

          custom_prop[which.max(dif)] <- custom_prop[which.max(dif)] - custom_prop[which.max(dif)] * .1
          custom_prop[which.min(dif)] <- custom_prop[which.min(dif)] + custom_prop[which.max(dif)] * .1

          #Corrects roundup issues
          if(sum(custom_prop)!=1){

            custom_prop[which.min(dif)] <- custom_prop[which.min(dif)] + (1 - sum(custom_prop))

          }

          nrep <- nrep + 1

        }

        else{

          nrep <- nrep + 1

        }
      }



      #Stops if n =  nsim
      if(nrep == custom_nrep){

        m <- gen_cor(x, sdev, custom_signal = signal)
        matcor <- m$matcor
        gendata <- m$cordata


        break

      }
    }
  }


  t2 <- Sys.time()
  runtime <- as.numeric(difftime(t2, t1, units = "secs"))

  genmat <- list(Matrix = matcor,
                 Method = st_method,
                 Proportions = prop_cor(matcor, custom_lim),
                 Runtime = runtime,
                 Nsim = nrep,
                 Precision= custom_precision,
                 Dimension = d,
                 SDev = sdev,
                 Custom_prop = custom_prop,
                 Custom_lim = custom_lim,
                 Signal = signal,
                 Nrep = custom_nrep,
                 Sort_intensity = sort_intensity,
                 `Generated data` = gendata)


  class(genmat) <- "gencor"


  return(genmat)

}

#
# print.gencor <- function(obj){
#
#   cat("\nGenerated Matrix\n\n")
#   print(obj$Matrix)
#
# }
#
#
# plot.gencor <- function(obj){
#
#   library(reshape2)
#   library(ggplot2)
#   melted_data <- melt(obj$Matrix)
#
#   ggplot(data = melted_data, aes(x=Var1, y=Var2, fill=value)) +
#     geom_tile() +
#     scale_fill_gradient2(low = "blue", high = "red", mid = "white",
#                          midpoint = 0, limit = c(-1,1), space = "Lab",
#                          name="Pearson\nCorrelation") +
#     xlab("") +
#     ylab("") +
#     theme_minimal() +
#     theme(axis.text.x=element_blank()) +
#     theme(axis.text.y=element_blank())
#
# }
#
#
# summary.gencor <- function(obj){
#
#   cat("\nUsed Method: ", obj$Method,"\n")
#
#   cat("Generated Matrix\n")
#
#   print(obj$Matrix)
#
#   cat("\n\nLimits and Obtained Proportions\n")
#   print(obj$Proportions)
#
#   cat("\nElapsed Time: ", obj$Runtime, " secs\n")
#   cat("Random variables size: ", obj$Nrep)
#
#   if(obj$Method == "Custom"){
#
#     cat("\nCustom Precision of method: ", obj$Precision)
#     cat("\nSimulations to convergence: ", obj$Nsim)
#     cat("\n\n")
#
#   }
#
#
# }
#
# hist.gencor <- function(obj, main = "Histogram of generated correlation matrix", xlab = "Correlations", color = "gray", ...){
#
#   removone <- function(x) return(x[-which(x == 1)])
#
#   hist(removone(obj$Matrix), main = main, xlab = xlab, col = color, ...)
#
# }
# #NOTAS PARA PACOTE
#
# #Checar consistência entre número de faixas e numero de proporções
#
# gencor <- function(d = 10,
#                    method = c("random", "low", "medium", "high", "custom"),
#                    custom_prop = NULL,
#                    nsim = 1000,
#                    lim_low = .3,
#                    lim_medium = .6,
#                    custom_lim = NULL,
#                    signal = c("random", "positive"),
#                    custom_precision = .03,
#                    custom_nrep = 1000,
#                    sort_intensity = F,
#                    random_liminf = 0.01){
#
#
#   #Generate standard deviation values based on expected limits ----
#   gensd <- function(lim = c(li, ls), n = 1){
#
#     return(runif(n, sqrt((1/lim[2])-1), sqrt((1/(lim[1]))-1)))
#
#   }
#
#   #Generate correlation matrix based on defined limits
#   gen_cor <- function(x, sdev, custom_signal = "random"){
#
#     #Create the matrix by repeating x as columns
#     matx <- matrix(rep(x, length(sdev)), nc = length(sdev), byrow = F)
#
#     #Sets the signals of correlation matrix
#
#     if(custom_signal == 'random'){
#
#       sig <- diag(sample(c(-1, 1), length(sdev), replace = T))
#
#     }
#
#     if(custom_signal == 'positive'){
#
#       sig <- diag(length(sdev))
#     }
#
#     matx <- matx %*% sig
#
#     #Generate normal rv based on standard values that attends the limits and sums to initial normal rv
#     y <-  matx + mapply(function(x,y){rnorm(x, y, n = nsim)}, x = 0, y = sdev)
#
#     return(list(matcor = cor(y),
#                 cordata = y))
#
#   }
#
#   #Computes the correlation proportion in the intensity bands
#   prop_cor <- function(x, custom_lim = NULL){
#
#     if(is.null(custom_lim)){
#
#       custom_lim <-  c(lim_low, lim_medium)
#
#     }
#
#     cuts <- c(0, custom_lim, 1)
#
#     x <-  abs(as.vector(x))
#     x <- x[-which(x == 1)]
#
#     tab <- table(cut(x, cuts))
#
#     tab/sum(tab)
#
#     table(cut(x, cuts))/sum(table(cut(x, cuts)))
#
#   }
#
#
#   #Initial runtime
#   t1 <- Sys.time()
#
#   #Check the compatibility of arguments defined by the user
#   method <- match.arg(method)
#   signal <- match.arg(signal)
#
#   #Checks if the condition sqrt(lim_low) < lim_medium
#   if((lim_medium) <= sqrt(lim_low)){
#
#     stop("lim_medium must be greater than sqrt(lim_low)")
#   }
#
#   #If user informs custom limits or proportions, make method = "custom"
#   if(is.null(custom_lim) == F){
#
#     method <- "custom"
#
#   }
#
#
#   #Divides the (0,1) in equal intervals, take the largest value then
#   #apply the powers of 2 based on the number of custom proportions
#   #to create custom limits
#   if((is.null(custom_prop) == F) & is.null(custom_lim) == T){
#
#     method <- "custom"
#     pow <- 2^seq(from = 0, to = (length(custom_prop) - 2), by = 1)
#     custom_lim <- sort((1 - 1/(length(custom_prop) - 1)) ^ pow, decreasing = F)
#
#
#   }
#
#   st_method <- ""
#
#   #Generates the initial normal rv
#   x <- rnorm(nsim, 0, 1)
#
#   #Generate the sd vector based on the method choice
#
#   #Low correlations matrix ----
#   if(method == "low"){
#
#     sdev <- gensd(c(random_liminf, lim_low), d)
#
#     m <- gen_cor(x, sdev, custom_signal = signal)
#     matcor <- m$matcor
#     gendata <- m$cordata
#
#     st_method = "Low"
#
#   }
#
#   #Medium correlations matrix ----
#   if(method == "medium"){
#
#     sdev <- gensd(c(lim_low, lim_medium), d)
#
#     m <- gen_cor(x, sdev, custom_signal = signal)
#     matcor <- m$matcor
#     gendata <- m$cordata
#
#     st_method = "Medium"
#
#   }
#
#   #High correlation matrix ----
#   if(method == "high"){
#
#     sdev <- gensd(c(lim_medium, .9999), d)
#
#     m <- gen_cor(x, sdev, custom_signal = signal)
#     matcor <- m$matcor
#     gendata <- m$cordata
#
#     st_method = "High"
#
#   }
#
#   #Random correlation matrix ----
#   if(method == "random"){
#
#     sdev <- gensd(c(random_liminf, .9999), d)
#
#     m <- gen_cor(x, sdev, custom_signal = signal)
#     matcor <- m$matcor
#     gendata <- m$cordata
#
#     st_method = "Random"
#
#   }
#
#   ##Custom correlation matrix ----
#
#   nrep <- 0
#
#   #Store the desired proportions
#   custom_prop_or <- custom_prop
#
#
#   if(method == 'custom'){
#
#     st_method <- ifelse(st_method == "Random", "Random", "Custom")
#
#     repeat{
#
#       #If limits are not informed, will be used 0.3 and 0.6 as limits
#       if(is.null(custom_lim)){
#
#         custom_lim <- c(lim_low, lim_medium)
#
#       }
#
#       #If the custom proportions are not informed, they'll be defined as discrete uniform
#       if(is.null(custom_prop)){
#
#         custom_prop <- rep(1, length(custom_lim)+1) / (length(custom_lim)+1)
#         custom_prop_or <- custom_prop
#
#       }
#
#       if(d < length(custom_prop)){
#
#         message("The number of custom proportions must be less or equal the matrix dimesion")
#
#         break
#       }
#
#
#       #Add 0 and 1 to limits vector
#       custom_lim_df <- data.frame(custom_vec_li = c(random_liminf, custom_lim),
#                                   custom_vec_ls = c(custom_lim, 0.9999))
#
#       #Generate standard deviation by intensity band
#       sdev_df <- apply(custom_lim_df, 1, gensd, n = d)
#
#       #Define the elements number in each correlation group and round up
#       prop <- ceiling(custom_prop * d)
#
#       #Remove the columns if there's some band with null proportion
#       if(length(which(prop == 0)) > 0) {
#
#         sdev_df <- as.data.frame(sdev_df[,-which(prop == 0)])
#         prop <- prop[-which(prop == 0)]
#
#       }
#
#       #Generate the standard deviation vector
#       sdev <- NULL
#       for(i in 1:ncol(sdev_df)){
#
#         sdev <- c(sdev, sample(sdev_df[,i], size = prop[i], replace = F))
#
#       }
#
#
#
#       #Delete at random elements that exceeds the desired matrix size
#       if(length(sdev)>d){
#
#         sdev <- sdev[-sample(1:length(sdev), size = length(sdev) - d)]
#       }
#
#
#       #Sorts the standard deviation order
#       if(sort_intensity == F){
#
#         sdev <- sdev[sample(1:length(sdev))]
#       }
#
#       if(sort_intensity == T){
#
#         sdev <- sort(sdev)
#       }
#
#
#       #Generate the correlation matrix
#       m <- gen_cor(x, sdev, custom_signal = signal)
#       matcor <- m$matcor
#       gendata <- m$cordata
#
#
#
#       #Computes the difference between the desired and computed proportions
#       dif <- prop_cor(matcor, custom_lim) - custom_prop_or
#
#
#       #Check the conditions and if they are satisfied, returns the matrix
#       if(max(abs(dif)) < custom_precision){
#
#         break
#
#       }
#
#
#       #Optimization method
#       if(nrep < custom_nrep){
#
#         if((max(abs(dif)) > custom_precision) & runif(1) > nrep/custom_nrep){
#
#           custom_prop[which.max(dif)] <- custom_prop[which.max(dif)] - custom_prop[which.max(dif)] * .1
#           custom_prop[which.min(dif)] <- custom_prop[which.min(dif)] + custom_prop[which.max(dif)] * .1
#
#           #Corrects roundup issues
#           if(sum(custom_prop)!=1){
#
#             custom_prop[which.min(dif)] <- custom_prop[which.min(dif)] + (1 - sum(custom_prop))
#
#           }
#
#           nrep <- nrep + 1
#
#         }
#
#         else{
#
#           nrep <- nrep + 1
#
#         }
#       }
#
#
#
#       #Stops if n =  nsim
#       if(nrep == custom_nrep){
#
#         m <- gen_cor(x, sdev, custom_signal = signal)
#         matcor <- m$matcor
#         gendata <- m$cordata
#
#
#         break
#
#       }
#     }
#   }
#
#
#   t2 <- Sys.time()
#   runtime <- as.numeric(difftime(t2, t1, units = "secs"))
#
#   genmat <- list(Matrix = matcor,
#                  Method = st_method,
#                  Proportions = prop_cor(matcor, custom_lim),
#                  Runtime = runtime,
#                  Nsim = nrep,
#                  Precision= custom_precision,
#                  Dimension = d,
#                  SDev = sdev,
#                  Custom_prop = custom_prop,
#                  Custom_lim = custom_lim,
#                  Signal = signal,
#                  Nrep = custom_nrep,
#                  Sort_intensity = sort_intensity)#,
#                  #`Generated data` = gendata)
#
#
#   class(genmat) <- "gencor"
#
#
#   return(genmat)
#
# }
#
#

Try the gencor package in your browser

Any scripts or data that you put into this service are public.

gencor documentation built on Sept. 13, 2022, 5:06 p.m.