Nothing
#' Generates custom correlation matrices
#'
#' This method generates custom correlation matrices based on user-defined limits and/or proportions.
#'
#' @param d Dimension of the generated matrix. If not informed, \code{d} = 10
#' @param method The method of matrix generation.
#' \itemize{
#' \item "\code{random}": generates a random matrix with the given dimension;
#' \item "\code{low}": generates a matrix of values between \code{-lim_low} and \code{lim_low};
#' \item "\code{medium}": generates a matrix of values in the interval \cr \code{[-lim\_medium, -lim\_low) U (lim\_low, lim\_medium]};
#' \item "\code{high}": generates a matrix of values between \code{lim_medium} and 1.
#' \item "\code{custom}": Generates a matrix given the custom limits and proportions of each band defined by the limits.
#' }
#' @param lim_low The lower limit of generated correlations. Applied in \code{low} and \code{medium} methods by standard and in \code{custom} method if \code{custom_lim} are not informed.
#' @param lim_medium The medium limit of generated correlations. Applied in \code{low} and \code{medium} methods and in \code{custom} method if \code{custom_lim} are not informed.
#' @param custom_lim Number or numeric vector with customized limits to generate the correlation matrix.
#' @param custom_prop Vector with custom proportions for every band defined by \code{lim_low} and \code{lim_medium} or \code{custom_lim}. If not defined, the proportions will be equally distributed among the correlation bands.
#' @param nsim Size of vectors used to generate the correlation matrix.
#' @param signal Defines if the signals of the correlation matrix must be chosen at random or all must be positive.
#' \itemize{
#' \item "\code{positive}": generates a correlation matrix with all correlations positive. Some negative signals may occur for correlations sufficiently near zero.
#' \item "\code{random}": generates a correlation matrix with random signals
#' }
#' @param custom_precision The precision used in \code{custom} method. It is the maximum difference between \code{custom_prop} and the proportions generated by the function
#' @param custom_nrep The number of iterations in the optimization method used to generate custom correlation matrices.
#' @param sort_intensity Sorts the correlation matrix by intensity.
#' @param random_liminf Sets the lower limit of uniform distribution that generates the standard deviations used in random correlation matrix generation. Must be greater than zero due to convergence problems.
#' @param seed Enables seed definition.
#'
#' @return \code{gencor(...)} returns an object of class "gencor" with a list of the following objects:
#' \itemize{
#'
#' \item Matrix - The generated correlation matrix.
#' \item Method - The method used in generation
#' \item Proportions - The observed proportions at each level. The levels are given by default or user defined.
#' \item Runtime - Ellapsed simulation time
#' \item Nsim - Number of iterations needed to achieve the desired correlation matrix. 0 if the chosen method was "random".
#' \item Precision - The precision used on the optimization method.
#' \item Dimension - The dimension of the generated correlation matrix.
#' \item Sdev - Vector of standard deviations used in generation process.
#' \item Custom_propp - User defined proportions in custom method. NULL if the chosen method was random.
#' \item custom_lim - User defined correlation limits in custom method. NULL if the chosen method was random.
#' \item Signal - Type of signal generation defined by the user, "random" by default.
#' \item Nrep - Size of simulated data matrix used in correlation matrix generation.
#' \item Generated data - Simulated data used in the generation process.
#'
#' }
#'
#'
#' @details This method generates correlation matrices based on the correlations among normal random variables with mean 0 and specified standard deviation values. These specified standard deviation values make possible the control of the correlation coefficient intensity.
#'
#' @examples
#'
#' ## Generates a random correlation matrix with dimension 10
#' gencor()
#'
#' ## Generates a correlation matrix with correlations below 0.3
#' gencor(15, method = "low", lim_low = 0.3)
#'
#' ## Generates a correlation matrix with correlations between 0.3 and 0.7
#' gencor(15, method = "medium", lim_low = 0.3, lim_medium = 0.7)
#'
#' ## Generates a correlation matrix with correlations above 0.7
#' gencor(30, method = "high", lim_medium = 0.75)
#'
#' ## Generates a custom correlation matrix with:
#' ## - 30% of values below 0.2,
#' ## - 30% of values between 0.2 and 0.5,
#' ## - 20% of values between 0.5 and 0.8,
#' ## - 20% of values above 0.8
#' gencor(20, method = "custom", custom_lim = c(0.2, 0.5, 0.8), custom_prop = c(0.3, 0.3, 0.2, 0.2))
#'
#'
#' @importFrom stats runif
#' @importFrom stats rnorm
#' @importFrom stats cor
#' @export
gencor <- function(d = 10,
method = c("random", "low", "medium", "high", "custom"),
custom_prop = NULL,
nsim = 1000,
lim_low = .3,
lim_medium = .6,
custom_lim = NULL,
signal = c("random", "positive"),
custom_precision = .03,
custom_nrep = 1000,
sort_intensity = F,
random_liminf = 0.01,
seed = NULL){
#Enable seed definition
if(is.null(seed) == F){
set.seed(seed)
}
#Change the custom_liminf for positive matrix
if(random_liminf == "positive"){
random_liminf <- 0.05
}
#Generate standard deviation values based on expected limits ----
gensd <- function(lim, n = 1){
return(runif(n, sqrt((1/lim[2])-1), sqrt((1/(lim[1]))-1)))
}
#Generate correlation matrix based on defined limits
gen_cor <- function(x, sdev, custom_signal = "random"){
#Create the matrix by repeating x as columns
matx <- matrix(rep(x, length(sdev)), ncol = length(sdev), byrow = F)
#Sets the signals of correlation matrix
if(custom_signal == 'random'){
np <- round((length(sdev) + sqrt(length(sdev)))/2)
vetsig <- c(rep(1, np),
rep(-1, (length(sdev) - np)))
k <- sample(vetsig, size = length(sdev), replace = F)
sig <- do.call("rbind",
replicate(nsim, rbind(k), simplify = FALSE))
#matx <- matx %*% sig
matx <- matx * sig
}
if(custom_signal == 'positive'){
#sig <- diag(sample(c(-1, 1), length(sdev), replace = T))
#sig <- matrix(rep(sample(c(-1, 1), length(sdev), replace = T), each = nsim), ncol = length(sdev))
#k <- sample(c(-1, 1), length(sdev), replace = T)
vetsig <- rep(1, length(sdev))
k <- sample(vetsig, size = length(sdev), replace = F)
sig <- do.call("rbind",
replicate(nsim, rbind(k), simplify = FALSE))
#matx <- matx %*% sig
matx <- matx * sig
}
# if(custom_signal == 'positive'){
#
# sig <- diag(length(sdev))
# }
#Generate normal rv based on standard values that attends the limits and sums to initial normal rv
y <- matx + mapply(function(x,y){rnorm(x, y, n = nsim)}, x = 0, y = sdev)
return(list(matcor = cor(y),
cordata = y))
}
#Computes the correlation proportion in the intensity bands
prop_cor <- function(x, custom_lim = NULL){
if(is.null(custom_lim)){
custom_lim <- c(lim_low, lim_medium)
}
cuts <- c(0, custom_lim, 1)
x <- abs(as.vector(x))
x <- x[-which(x == 1)]
tab <- table(cut(x, cuts))
tab/sum(tab)
table(cut(x, cuts))/sum(table(cut(x, cuts)))
}
#Initial runtime
t1 <- Sys.time()
#Check the compatibility of arguments defined by the user
method <- match.arg(method)
signal <- match.arg(signal)
#Checks if the condition sqrt(lim_low) < lim_medium
if((lim_medium) <= sqrt(lim_low)){
stop("lim_medium must be greater than sqrt(lim_low)")
}
#If user informs custom limits or proportions, make method = "custom"
if(is.null(custom_lim) == F){
method <- "custom"
}
#Divides the (0,1) in equal intervals, take the largest value then
#apply the powers of 2 based on the number of custom proportions
#to create custom limits
if((is.null(custom_prop) == F) & is.null(custom_lim) == T){
method <- "custom"
pow <- 2^seq(from = 0, to = (length(custom_prop) - 2), by = 1)
custom_lim <- sort((1 - 1/(length(custom_prop) - 1)) ^ pow, decreasing = F)
}
st_method <- ""
#Generates the initial normal rv
x <- rnorm(nsim, 0, 1)
#Generate the sd vector based on the method choice
#Low correlations matrix ----
if(method == "low"){
sdev <- gensd(c(random_liminf, lim_low), d)
m <- gen_cor(x, sdev, custom_signal = signal)
matcor <- m$matcor
gendata <- m$cordata
st_method = "Low"
}
#Medium correlations matrix ----
if(method == "medium"){
sdev <- gensd(c(lim_low, lim_medium), d)
m <- gen_cor(x, sdev, custom_signal = signal)
matcor <- m$matcor
gendata <- m$cordata
st_method = "Medium"
}
#High correlation matrix ----
if(method == "high"){
sdev <- gensd(c(lim_medium, .9999), d)
m <- gen_cor(x, sdev, custom_signal = signal)
matcor <- m$matcor
gendata <- m$cordata
st_method = "High"
}
#Random correlation matrix ----
if(method == "random"){
sdev <- gensd(c(random_liminf, .9999), d)
m <- gen_cor(x, sdev, custom_signal = signal)
matcor <- m$matcor
gendata <- m$cordata
st_method = "Random"
}
##Custom correlation matrix ----
nrep <- 0
#Store the desired proportions
custom_prop_or <- custom_prop
if(method == 'custom'){
st_method <- ifelse(st_method == "Random", "Random", "Custom")
repeat{
#If limits are not informed, will be used 0.3 and 0.6 as limits
if(is.null(custom_lim)){
custom_lim <- c(lim_low, lim_medium)
}
#If the custom proportions are not informed, they'll be defined as discrete uniform
if(is.null(custom_prop)){
custom_prop <- rep(1, length(custom_lim)+1) / (length(custom_lim)+1)
custom_prop_or <- custom_prop
}
if(d < length(custom_prop)){
message("The number of custom proportions must be less or equal the matrix dimesion")
break
}
#Add 0 and 1 to limits vector
custom_lim_df <- data.frame(custom_vec_li = c(random_liminf, custom_lim),
custom_vec_ls = c(custom_lim, 0.9999))
#Generate standard deviation by intensity band
sdev_df <- apply(custom_lim_df, 1, gensd, n = d)
#Define the elements number in each correlation group and round up
prop <- ceiling(custom_prop * d)
#Remove the columns if there's some band with null proportion
if(length(which(prop == 0)) > 0) {
sdev_df <- as.data.frame(sdev_df[,-which(prop == 0)])
prop <- prop[-which(prop == 0)]
}
#Generate the standard deviation vector
sdev <- NULL
for(i in 1:ncol(sdev_df)){
sdev <- c(sdev, sample(sdev_df[,i], size = prop[i], replace = F))
}
#Delete at random elements that exceeds the desired matrix size
if(length(sdev)>d){
sdev <- sdev[-sample(1:length(sdev), size = length(sdev) - d)]
}
#Sorts the standard deviation order
if(sort_intensity == F){
sdev <- sdev[sample(1:length(sdev))]
}
if(sort_intensity == T){
sdev <- sort(sdev)
}
#Generate the correlation matrix
m <- gen_cor(x, sdev, custom_signal = signal)
matcor <- m$matcor
gendata <- m$cordata
#Computes the difference between the desired and computed proportions
dif <- prop_cor(matcor, custom_lim) - custom_prop_or
#Check the conditions and if they are satisfied, returns the matrix
if(max(abs(dif)) < custom_precision){
break
}
#Optimization method
if(nrep < custom_nrep){
if((max(abs(dif)) > custom_precision) & runif(1) > nrep/custom_nrep){
custom_prop[which.max(dif)] <- custom_prop[which.max(dif)] - custom_prop[which.max(dif)] * .1
custom_prop[which.min(dif)] <- custom_prop[which.min(dif)] + custom_prop[which.max(dif)] * .1
#Corrects roundup issues
if(sum(custom_prop)!=1){
custom_prop[which.min(dif)] <- custom_prop[which.min(dif)] + (1 - sum(custom_prop))
}
nrep <- nrep + 1
}
else{
nrep <- nrep + 1
}
}
#Stops if n = nsim
if(nrep == custom_nrep){
m <- gen_cor(x, sdev, custom_signal = signal)
matcor <- m$matcor
gendata <- m$cordata
break
}
}
}
t2 <- Sys.time()
runtime <- as.numeric(difftime(t2, t1, units = "secs"))
genmat <- list(Matrix = matcor,
Method = st_method,
Proportions = prop_cor(matcor, custom_lim),
Runtime = runtime,
Nsim = nrep,
Precision= custom_precision,
Dimension = d,
SDev = sdev,
Custom_prop = custom_prop,
Custom_lim = custom_lim,
Signal = signal,
Nrep = custom_nrep,
Sort_intensity = sort_intensity,
`Generated data` = gendata)
class(genmat) <- "gencor"
return(genmat)
}
#
# print.gencor <- function(obj){
#
# cat("\nGenerated Matrix\n\n")
# print(obj$Matrix)
#
# }
#
#
# plot.gencor <- function(obj){
#
# library(reshape2)
# library(ggplot2)
# melted_data <- melt(obj$Matrix)
#
# ggplot(data = melted_data, aes(x=Var1, y=Var2, fill=value)) +
# geom_tile() +
# scale_fill_gradient2(low = "blue", high = "red", mid = "white",
# midpoint = 0, limit = c(-1,1), space = "Lab",
# name="Pearson\nCorrelation") +
# xlab("") +
# ylab("") +
# theme_minimal() +
# theme(axis.text.x=element_blank()) +
# theme(axis.text.y=element_blank())
#
# }
#
#
# summary.gencor <- function(obj){
#
# cat("\nUsed Method: ", obj$Method,"\n")
#
# cat("Generated Matrix\n")
#
# print(obj$Matrix)
#
# cat("\n\nLimits and Obtained Proportions\n")
# print(obj$Proportions)
#
# cat("\nElapsed Time: ", obj$Runtime, " secs\n")
# cat("Random variables size: ", obj$Nrep)
#
# if(obj$Method == "Custom"){
#
# cat("\nCustom Precision of method: ", obj$Precision)
# cat("\nSimulations to convergence: ", obj$Nsim)
# cat("\n\n")
#
# }
#
#
# }
#
# hist.gencor <- function(obj, main = "Histogram of generated correlation matrix", xlab = "Correlations", color = "gray", ...){
#
# removone <- function(x) return(x[-which(x == 1)])
#
# hist(removone(obj$Matrix), main = main, xlab = xlab, col = color, ...)
#
# }
# #NOTAS PARA PACOTE
#
# #Checar consistência entre número de faixas e numero de proporções
#
# gencor <- function(d = 10,
# method = c("random", "low", "medium", "high", "custom"),
# custom_prop = NULL,
# nsim = 1000,
# lim_low = .3,
# lim_medium = .6,
# custom_lim = NULL,
# signal = c("random", "positive"),
# custom_precision = .03,
# custom_nrep = 1000,
# sort_intensity = F,
# random_liminf = 0.01){
#
#
# #Generate standard deviation values based on expected limits ----
# gensd <- function(lim = c(li, ls), n = 1){
#
# return(runif(n, sqrt((1/lim[2])-1), sqrt((1/(lim[1]))-1)))
#
# }
#
# #Generate correlation matrix based on defined limits
# gen_cor <- function(x, sdev, custom_signal = "random"){
#
# #Create the matrix by repeating x as columns
# matx <- matrix(rep(x, length(sdev)), nc = length(sdev), byrow = F)
#
# #Sets the signals of correlation matrix
#
# if(custom_signal == 'random'){
#
# sig <- diag(sample(c(-1, 1), length(sdev), replace = T))
#
# }
#
# if(custom_signal == 'positive'){
#
# sig <- diag(length(sdev))
# }
#
# matx <- matx %*% sig
#
# #Generate normal rv based on standard values that attends the limits and sums to initial normal rv
# y <- matx + mapply(function(x,y){rnorm(x, y, n = nsim)}, x = 0, y = sdev)
#
# return(list(matcor = cor(y),
# cordata = y))
#
# }
#
# #Computes the correlation proportion in the intensity bands
# prop_cor <- function(x, custom_lim = NULL){
#
# if(is.null(custom_lim)){
#
# custom_lim <- c(lim_low, lim_medium)
#
# }
#
# cuts <- c(0, custom_lim, 1)
#
# x <- abs(as.vector(x))
# x <- x[-which(x == 1)]
#
# tab <- table(cut(x, cuts))
#
# tab/sum(tab)
#
# table(cut(x, cuts))/sum(table(cut(x, cuts)))
#
# }
#
#
# #Initial runtime
# t1 <- Sys.time()
#
# #Check the compatibility of arguments defined by the user
# method <- match.arg(method)
# signal <- match.arg(signal)
#
# #Checks if the condition sqrt(lim_low) < lim_medium
# if((lim_medium) <= sqrt(lim_low)){
#
# stop("lim_medium must be greater than sqrt(lim_low)")
# }
#
# #If user informs custom limits or proportions, make method = "custom"
# if(is.null(custom_lim) == F){
#
# method <- "custom"
#
# }
#
#
# #Divides the (0,1) in equal intervals, take the largest value then
# #apply the powers of 2 based on the number of custom proportions
# #to create custom limits
# if((is.null(custom_prop) == F) & is.null(custom_lim) == T){
#
# method <- "custom"
# pow <- 2^seq(from = 0, to = (length(custom_prop) - 2), by = 1)
# custom_lim <- sort((1 - 1/(length(custom_prop) - 1)) ^ pow, decreasing = F)
#
#
# }
#
# st_method <- ""
#
# #Generates the initial normal rv
# x <- rnorm(nsim, 0, 1)
#
# #Generate the sd vector based on the method choice
#
# #Low correlations matrix ----
# if(method == "low"){
#
# sdev <- gensd(c(random_liminf, lim_low), d)
#
# m <- gen_cor(x, sdev, custom_signal = signal)
# matcor <- m$matcor
# gendata <- m$cordata
#
# st_method = "Low"
#
# }
#
# #Medium correlations matrix ----
# if(method == "medium"){
#
# sdev <- gensd(c(lim_low, lim_medium), d)
#
# m <- gen_cor(x, sdev, custom_signal = signal)
# matcor <- m$matcor
# gendata <- m$cordata
#
# st_method = "Medium"
#
# }
#
# #High correlation matrix ----
# if(method == "high"){
#
# sdev <- gensd(c(lim_medium, .9999), d)
#
# m <- gen_cor(x, sdev, custom_signal = signal)
# matcor <- m$matcor
# gendata <- m$cordata
#
# st_method = "High"
#
# }
#
# #Random correlation matrix ----
# if(method == "random"){
#
# sdev <- gensd(c(random_liminf, .9999), d)
#
# m <- gen_cor(x, sdev, custom_signal = signal)
# matcor <- m$matcor
# gendata <- m$cordata
#
# st_method = "Random"
#
# }
#
# ##Custom correlation matrix ----
#
# nrep <- 0
#
# #Store the desired proportions
# custom_prop_or <- custom_prop
#
#
# if(method == 'custom'){
#
# st_method <- ifelse(st_method == "Random", "Random", "Custom")
#
# repeat{
#
# #If limits are not informed, will be used 0.3 and 0.6 as limits
# if(is.null(custom_lim)){
#
# custom_lim <- c(lim_low, lim_medium)
#
# }
#
# #If the custom proportions are not informed, they'll be defined as discrete uniform
# if(is.null(custom_prop)){
#
# custom_prop <- rep(1, length(custom_lim)+1) / (length(custom_lim)+1)
# custom_prop_or <- custom_prop
#
# }
#
# if(d < length(custom_prop)){
#
# message("The number of custom proportions must be less or equal the matrix dimesion")
#
# break
# }
#
#
# #Add 0 and 1 to limits vector
# custom_lim_df <- data.frame(custom_vec_li = c(random_liminf, custom_lim),
# custom_vec_ls = c(custom_lim, 0.9999))
#
# #Generate standard deviation by intensity band
# sdev_df <- apply(custom_lim_df, 1, gensd, n = d)
#
# #Define the elements number in each correlation group and round up
# prop <- ceiling(custom_prop * d)
#
# #Remove the columns if there's some band with null proportion
# if(length(which(prop == 0)) > 0) {
#
# sdev_df <- as.data.frame(sdev_df[,-which(prop == 0)])
# prop <- prop[-which(prop == 0)]
#
# }
#
# #Generate the standard deviation vector
# sdev <- NULL
# for(i in 1:ncol(sdev_df)){
#
# sdev <- c(sdev, sample(sdev_df[,i], size = prop[i], replace = F))
#
# }
#
#
#
# #Delete at random elements that exceeds the desired matrix size
# if(length(sdev)>d){
#
# sdev <- sdev[-sample(1:length(sdev), size = length(sdev) - d)]
# }
#
#
# #Sorts the standard deviation order
# if(sort_intensity == F){
#
# sdev <- sdev[sample(1:length(sdev))]
# }
#
# if(sort_intensity == T){
#
# sdev <- sort(sdev)
# }
#
#
# #Generate the correlation matrix
# m <- gen_cor(x, sdev, custom_signal = signal)
# matcor <- m$matcor
# gendata <- m$cordata
#
#
#
# #Computes the difference between the desired and computed proportions
# dif <- prop_cor(matcor, custom_lim) - custom_prop_or
#
#
# #Check the conditions and if they are satisfied, returns the matrix
# if(max(abs(dif)) < custom_precision){
#
# break
#
# }
#
#
# #Optimization method
# if(nrep < custom_nrep){
#
# if((max(abs(dif)) > custom_precision) & runif(1) > nrep/custom_nrep){
#
# custom_prop[which.max(dif)] <- custom_prop[which.max(dif)] - custom_prop[which.max(dif)] * .1
# custom_prop[which.min(dif)] <- custom_prop[which.min(dif)] + custom_prop[which.max(dif)] * .1
#
# #Corrects roundup issues
# if(sum(custom_prop)!=1){
#
# custom_prop[which.min(dif)] <- custom_prop[which.min(dif)] + (1 - sum(custom_prop))
#
# }
#
# nrep <- nrep + 1
#
# }
#
# else{
#
# nrep <- nrep + 1
#
# }
# }
#
#
#
# #Stops if n = nsim
# if(nrep == custom_nrep){
#
# m <- gen_cor(x, sdev, custom_signal = signal)
# matcor <- m$matcor
# gendata <- m$cordata
#
#
# break
#
# }
# }
# }
#
#
# t2 <- Sys.time()
# runtime <- as.numeric(difftime(t2, t1, units = "secs"))
#
# genmat <- list(Matrix = matcor,
# Method = st_method,
# Proportions = prop_cor(matcor, custom_lim),
# Runtime = runtime,
# Nsim = nrep,
# Precision= custom_precision,
# Dimension = d,
# SDev = sdev,
# Custom_prop = custom_prop,
# Custom_lim = custom_lim,
# Signal = signal,
# Nrep = custom_nrep,
# Sort_intensity = sort_intensity)#,
# #`Generated data` = gendata)
#
#
# class(genmat) <- "gencor"
#
#
# return(genmat)
#
# }
#
#
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.