# functions ----------------------------------------------------------
# simulate data function ---------------------------------------------
#' Simulate Data for Components Analysis
#'
#' @param n_pop The size of the population to simulate
#' @param b_s The correlation between behavior and the situational variable on level-2, which determines the correlation of their counterparts on level-1.
#' @param b_e The correlation between the stable environment e and the behavior b.
#' @param s_p The correlation between the situational variable and the average behavior.
#' @param b_p The correlation between the average behavior and personality.
#' @param s_e The correlation between the situation and the stable environment
#' @param p_e The correlation between the personality p and the environment e.
#' @param cor_bs_l1 The average correlation of behavior and situations on level-1, within person.
#' @param l1_var The variance of the level-1 variables (the larger, the less accurate will the mean reflect the true score at fewer measurement occasions)
#' @param l1_bs_sample The number of level-1 measurement occaisons that are sampled.
#'
#' @return Returns a data frame with the following variables
#' \itemize{
#' \item{"parameter 1"}{Stuff}
#' \item{"parameter 2"}{Stuff}
#' \item{"parameter 2"}{Stuff}
#' \item{"parameter 2"}{Stuff}
#' }
#' @export
#'
#' @examples
#' data <- sim_data()
sim_data <- function(n_pop = 1000,
b_s = 0.26,
b_e = 0.19,
s_p = 0.14,
b_p = 0.27,
s_e = 0.25,
p_e = 0.23,
cor_bs_l1 = .2,
l1_var = 2,
l1_bs_sample = 50){
## setup
# empirical values
# b_s = 0.26,
# b_e = 0.19,
# s_p = 0.14,
# b_p = 0.27,
# s_e = 0.25,
# p_e = 0.23,
# b_s = 0.3
# b_e = 0.2
# s_p = 0.2
# b_p = 0.4
# s_e = 0.2
# p_e = 0.3
Sigma <- matrix(c(
1, b_s, b_p, b_e,
b_s, 1, s_p, s_e,
b_p, s_p, 1, p_e,
b_e, s_e, p_e, 1
), nrow=4, ncol=4)
# Sigma matrix
n_var <- ncol(Sigma)
means_all = 0
mu <- rep(means_all, n_var)
rawvars <- MASS::mvrnorm(n=n_pop, mu=mu, Sigma=Sigma)
colnames(rawvars) <- c("B", "S", "P", "E")
raw_data <- data.frame(rawvars)
B <- raw_data[,"B"]
S <- raw_data[,"S"]
id <- stringi::stri_rand_strings(length(B), 20)
raw_data$id <- id
simulate_l1_bs <- function(mu_S_person, mu_B_person, cor_bs_l1 = .2,
l1_bs_population = 1000,
l1_bs_sample = 50,
sd_within_slope = 0.05){
# bsr <- rnorm(n=1, mean=cor_bs_l1, sd = sd_within_slope) # level 1 correlation between b and s with ind. differences in slopes
bsr <- cor_bs_l1
l1_bs_matrix <- matrix(c(l1_var, bsr,
bsr, l1_var), 2, 2, byrow=TRUE)
l1_bs_raw <- MASS::mvrnorm(n = l1_bs_population,
mu = c(mu_S_person, mu_B_person),
Sigma = l1_bs_matrix)
esm_occasions <- sample(1:l1_bs_population, l1_bs_sample, replace = FALSE)
sample_occasions <- data.frame(l1_bs_raw[esm_occasions,])
colnames(sample_occasions) <- c("s_state", "b_state")
sample_occasions
}
level1_states <- purrr::map2(.x = S,
.y = B,
.f = function(x, y) {
simulate_l1_bs(x, y,
cor_bs_l1 = cor_bs_l1,
l1_bs_sample = l1_bs_sample)
}
)
# helper function to add the id
cidd <- function(d, i){
is <- rep(i, nrow(d))
d$id <- is
d
}
## add id-variables on level-1
level_1_id_states <- purrr::map2(.x = level1_states,
.y = id,
.f = function(x, y) {
cidd(x, y)
}
)
# bind data frames from list into one data frame
l1_states_data <- do.call("rbind", level_1_id_states)
# merge data on level-1 and level-2
simulated_data <- dplyr::inner_join(raw_data, l1_states_data, by = "id")
# return data
return(simulated_data)
}
# components analysis -----------------------------------------------
#' Run Components Analysis
#'
#' @param data The data frame to analyze
#' @param outcome_state The outcome or dependent variable that is predicted (the average level-1 variable)
#' @param pred_trait The predictor variables on level-2
#' @param pred_state The predictor variable on level-1
#' @param id The id/group variable of the participants
#'
#' @return Returns a list with two elements:
#' \itemize{
##' \item{1}{The results of the components analysis}
##' \item{2}{The explained variance per state}
##' \item{3}{The results of the dominance analyses}
##' }
#' @export
#'
#' @examples
#' simulated_data <- sim_data()
#' data <- simulated_data
#' data <- cons_comp_analysis(simulated_data)
cons_comp_analysis <- function(data, outcome_state = "b_state", pred_trait = c("P", "E"),
pred_state = c("s_state"),
single = FALSE,
id = "id"){
# max measurement occasions
out <- split(data, f = data[, id])
length_out <- map(out, nrow)
max_o <- max(unlist(length_out))
# function to make all data frames have equal rows
make_nrow <- function(x, max_o) {
if (nrow(x) == max_o) {
return(x)
} else {
x[(nrow(x)+1):max_o,] <- NA
return(x)
}
}
# make all data frames in the list have equal length
level_1_id_states <- map(out, ~ make_nrow(., max_o))
# function to generate the consecutive state variables
# i = 4
aggregate_within <- function(odf){
data.frame(id = odf[1,id],
out_trait = mean(odf[,outcome_state], na.rm = TRUE),
pre_trait = mean(odf[,pred_state], na.rm = TRUE))
}
sample_all_o <- function(i) { # i is the occasion of interest here
odf <- map(level_1_id_states, ~ .[1:i, ])
## single behavior and single situation as predictors
single_occasion <- map(level_1_id_states, ~ .[i, ])
# aggregate states
aggregate_states <- map(odf, aggregate_within) # function from above
# combine aggregated states in data frame
aggregate_data <- do.call("rbind", aggregate_states)
single_state_data <- do.call("rbind", single_occasion)
# set names of id before merging
data.table::setnames(aggregate_data, old="id", new = id)
# merge aggregate state and trait data
suppressWarnings(full_data <- dplyr::inner_join(data, aggregate_data, by = id))
# select single states
single_state_data_subset <- single_state_data[c(outcome_state, pred_state, id)]
data.table::setnames(single_state_data_subset, old=c(outcome_state, pred_state), new = paste0(c(outcome_state, pred_state), "_single"))
# merge single states with data frame
full_data <- dplyr::inner_join(full_data, single_state_data_subset, by = id)
# take subset of variables (rows and columns) that are needed for the analyses
level2_data <- distinct(full_data[c(pred_trait, "out_trait", "pre_trait", paste0(c(outcome_state, pred_state), "_single"), id)])
data.table::setnames(x = level2_data, old = c("out_trait", "pre_trait"), new = c(outcome_state, pred_state))
# component analysis with single behavior
if (single) {
apsOut = aps(level2_data, paste0(outcome_state, "_single"), list(pred_trait[1], pred_state, pred_trait[2], paste0(pred_state, "_single")))
} else {
# componen analysis with aggregated behavior outcome
apsOut = aps(level2_data, outcome_state, list(pred_trait[1], pred_state, pred_trait[2]))
}
# return parameters
commonality <- round(commonality(apsOut)[,2], 3)
rsquared <- sum(apsOut$APSMatrix[,2])
dominance <- dominance(apsOut)$GD # return general dominance
if (rsquared > 1) {
rsquared <- 1
warning("R-squared in this model was large than 1 and therefore set to 1")
}
# return elements for each situation/measurement
return(
list(commonality,
rsquared,
dominance)
)
}
# run the above function across all state averages
params <- map(1:max_o, sample_all_o)
# extract all firt elements of the list
# componentens
c <- lapply(params, "[[", 1)
params_df <- do.call("rbind", c)
params_df[params_df < 0] <- NA
# r-squared
rsq <- unlist(lapply(params, "[[", 2))
# dominance
dmc <- lapply(params, "[[", 3)
dmc_df <- do.call("rbind", dmc)
return(
list(params_df,
rsq,
dmc_df)
)
}
# Plot Stacked Areas -------------------------------------------------
#' Plot Stacked Areas
#'
#' @param data The data frame that needs to be plotted. Should only contain the variables that should appear in the plot.
#' @param poly.border.col The color of the border of the stacked areas.
#' @param poly.fill.col The fill of the stacked areas. The areas are colored from top to bottom (i.e. the highest one first).
#' @param letters The letters that should be placed in the areas. If NULL, the names of the data is used.
#' @param letter.col The colors of the letters that are plotted.
#' @param letters.right. Logical, default is FALSE. If TRUE, the letters are set to the right of the plot.
#' @param ... Further arguments passed on to the plot function plot(...)
#'
#' @return Plots areas that are scaled to 100%.
#' @export
#'
#' @examples
#' simulated_data <- sim_data()
#' comp_list <- cons_comp_analysis(simulated_data)
#' plot_data <- comp_list[[1]]
#' plot_polygons(plot_data, letters.right = TRUE)
plot_polygons <- function(data,
poly.border.col = "grey10",
poly.fill.col = c("white", "white",
"white", "white", "#F8CBAE",
"#FDF2CA", "#9DC3E6"),
letters = c("P,S,E", "S,E", "P,E", "P,S", "E", "S", "P"),
letter.col = "black",
letters.right = FALSE,
...){
is.num <- unlist(purrr::map(data, is.numeric))
nonNumeric <- names(data)[!is.num]
if (any(!is.num)) {
warning(paste("The following items/columns are non-numeric. \n ",
nonNumeric, collapse=" "), call.=FALSE)
}
xunits <- (nrow(data)) # determine the number of units, because we want to plot from 0 to n.
rownames(data) <- 1:xunits
# get percentages:
total <- rowSums(data, na.rm=TRUE)
perc.dat <- as.data.frame(apply(data, 2, function(x) x/total*100))
t.perc.dat <- t(perc.dat)
# make a vector over which we apply the function that computes the boundaries for the polygons
len <- 2:ncol(data)
# apply the function in order to get the borders of the polygons
res <- rev(lapply(len, function(x) colSums(t.perc.dat[1:x,], na.rm=TRUE)))
# add the last polygon to list
res[[length(res)+1]] <- t.perc.dat[1,]
# compute the lower border ofthe last polygon, i. e. 0, repeated as many times as data has rows
lower <- rep(0, xunits) # +1
# plot the polygons, define the function
plot.poly <- function(x){
xcoords <- c(xunits:1, 1:xunits)
ycoords <- c(lower, res[[x]])
polygon(xcoords, ycoords, col = poly.fill.col[x], border = poly.border.col)
}
## plot an empty area
plot(NA, ylim=c(1, 110), xlim = c(0, xunits), type="n",
yaxt="n", xaxt="n", bty="n", ylab="", xlab="", ...)
axis(1)
# plot the polygons
invisible(lapply(1:length(res), function(x) plot.poly(x) ))
if (ncol(data) != length(poly.fill.col)) {
warning("Only ", length(poly.fill.col), " colors were specified, but ", ncol(data), " areas were plotted. Colors were recycled. Please check.")
}
# add letters, default is to names of data
if (is.null(letters) ) { letters <- rev(colnames(data)) }
# add to the result list with the borders another lower border, i. e. the zeros with names
names(lower) <- 0:(length(lower)-1)
res[[length(res)+1]] <- lower
# apply the function over the borders, computing the borders and plotting the polygon
invisible(
lapply(1:(length(res)-1), function(x){
y = x + 1
# make two empty lists
list_low <- list()
list_up <- list()
# reduce the plotting area left and right
list_low[[x]] <- res[[x]][ceiling(length(res[[x]])*.08):floor(length(res[[x]])*.90)]
list_up[[x]] <- res[[y]][ceiling(length(res[[y]])*.08):floor(length(res[[y]])*.90)]
## add here the argument that letters can be plotted to the right
if ( nrow(data) >= 12 & !letters.right) {
# get the position of the letters, which is where the maximum of the difference between the borders is. if there is more than one maximum, take only the first
posx = names(which(list_low[[x]]-list_up[[x]] == max(list_low[[x]]-list_up[[x]])))[1]
# compute the mean difference, needs to be in the tighter/reduced lists
posy = mean(c(list_low[[x]][posx], list_up[[x]][posx]))
text(as.numeric(posx), posy, letters[x], col = letter.col)
} else
{
posx <- (nrow(data)-1)
# compute the mean, not in the tight lists (but in 'res', to plot the letter to the right hand side)
posy = mean(c(res[[x]][posx], res[[y]][posx]))
text(posx-posx*0.03, posy, letters[x], col = letter.col)
}
}))
}
# plot components with explained variance ----------------------------
#' Plot results of components analysis
#'
#' @param params_df The results of the function, which is a list with to entries.
#' @param ... Further arguments passed on to plot_polygons
#'
#' @return
#' Returns a plot with the components
#' @export
#'
#' @examples
#' data <- sim_data()
#' comp_list <- cons_comp_analysis(simulated_data)
#' plot_com(components = comp_list[[1]], rsquared = comp_list[[2]])
plot_com <- function(components, rsquared, ...){
params <- components
rsq <- rsquared
# par (mar = c(5.1, 4.1, 4.1, 0))
plot_polygons(params, ...)
# plot addional explained variance in the plot
rsq_plot <- rsq*10+100
lines(rsq_plot, col = "black", lwd = 2, lty = 3)
min <- round(rsq[1], 2)
max <- round(rsq[length(rsq_plot)], 2)
text(x = 1, y = 10 + 100, labels=min)
text(x = length(rsq_plot), y = 10 + 100, labels=max)
axis(2, at = c(0, 20, 40, 60, 80, 100), labels = c(0, 20, 40, 60, 80, 100))
# add explained variance
axis(4, at=c(100, 105, 110), labels = c(0, 50, 100))
}
# plot dominance -----------------------------------------------------
#' Plot Results from Dominance Analyses
#'
#' @param dominance The dominance results from cons_comp_analysis
#' @param line_names The names of the lines. Default is P, state, and E
#' @param ... Further parameters passed on to plot()
#'
#' @return Returns a plot with the results of the dominance analyses
#' @export
#'
#' @examples
#' data <- sim_data()
#' estimates <- cons_comp_analysis(data, single=FALSE)
#' plot_dominance(estimates)
plot_dominance <- function(dominance, line_names = c("P", "state", "E"), ...){
x_max <- nrow(dominance)
y_max <- max(dominance)
y_max <- round(y_max*10, 0)/10
plot(NA, ylim = c(0, y_max), xlim = c(0, x_max), ylab = "Dominance", xlab = "Number of States", bty = "n", ...)
line_names <- colnames(dominance)
line_types <- 1:ncol(dominance)+1
invisible(purrr::map2(.x = data.frame(dominance), .y = line_types,
.f = function(x,y) {
lines(x, lty = y)
}))
legend(x=0, y=y_max, legend=line_names, lty=line_types, bty = "n")
}
# replicate sim -----------------------------------------------------
#' Repeat Simulation
#'
#' @param REPS The number of simulations to be made of
#' @param n_pop The size of the population to simulate
#' @param b_s The correlation between behavior and the situational variable on level-2, which determines the correlation of their counterparts on level-1.
#' @param b_e The correlation between the stable environment e and the behavior b.
#' @param s_p The correlation between the situational variable and the average behavior.
#' @param b_p The correlation between the average behavior and personality.
#' @param s_e The correlation between the situation and the stable environment
#' @param p_e The correlation between the personality p and the environment e.
#' @param cor_bs_l1 The average correlation of behavior and situations on level-1, within person.
#' @param l1_var The variance of the level-1 variables (the larger, the less accurate will the mean reflect the true score at fewer measurement occasions)
#' @param l1_bs_sample The number of level-1 measurement occaisons that are sampled.
#'
#' @return Returns a list with two objects: The first is a data frame for the component analyses, the second is a vector with the explained variances.
#' @export
#'
#' @examples
#' replicate_sim(10)
replicate_sim <- function(REPS = 10,
n_pop = 1000,
b_s = 0.26,
b_e = 0.19,
s_p = 0.14,
b_p = 0.27,
s_e = 0.25,
p_e = 0.23,
cor_bs_l1 = .2,
l1_var = 2,
l1_bs_sample = 50){
# function to replicate
repl_f <- function(...){
data <- sim_data(...)
parameters <- cons_comp_analysis(data)
parameters
}
# pass all parameters from above
parameters_list <- replicate(REPS, repl_f(n_pop = n_pop, b_s = b_s, b_e = b_e,
s_p = s_p, b_p = b_p, s_e = s_e,
p_e = p_e, cor_bs_l1 = cor_bs_l1,
l1_var = l1_var, l1_bs_sample = l1_bs_sample)
)
parameters_pos <- seq(1, REPS*2, by = 2)
rsquared_pos <- seq(2, REPS*2, by = 2)
para_list <- purrr::map(parameters_pos, ~ parameters_list[[.]])
rsq_list <- purrr::map(rsquared_pos, ~ parameters_list[[.]])
para_mean <- plyr::aaply(plyr::laply(para_list, as.matrix), c(2, 3), mean, na.rm =TRUE)
rsq_mean <- colMeans(do.call("rbind", rsq_list))
return <- list(para_mean,
rsq_mean)
return(return)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.