# ---------------------------
# generate_data.R
# Generate seed_data and send it
# back.
# ---------------------------
# Necessary functions
# ---------------------------
#' get_seed_data
#'
#' @export
get_seed_data = function(seed, n, dims, dists, effs){
set.seed(seed)
# Unprotected cluster
# TODO: why 2*n?
unprot = mvrnorm(2*n, rep(5, dims), diag(dims))
cent_unprot = colMeans(unprot)
if (!is.null(globals_list$par_effs) && globals_list$par_effs){
effs_data_list = foreach(eff=effs, .inorder=FALSE, .export=c("globals_list"), .packages=c("ksmthesis")) %dopar% {
return(generate_effs_data(eff, dists, unprot, cent_unprot))
}
} else {
effs_data_list = lapply(effs, generate_effs_data, dists, unprot, cent_unprot)
}
if (is.null(globals_list$do_permute) || !globals_list$do_permute){
effs_data = do.call(rbind, effs_data_list)
return(effs_data)
} else {
effs_data = lapply(effs_data_list, function(x){
return(x$dists_data)
})
effs_data = do.call(rbind, effs_data)
# NOTE: jank
permuted_effs_inf_data = lapply(effs_data_list, function(x){
return(x$permuted_dists_data$inf)
})
permuted_effs_inf_data = do.call(rbind, permuted_effs_inf_data)
permuted_effs_prot_data = lapply(effs_data_list, function(x){
return(x$permuted_dists_data$prot)
})
permuted_effs_prot_data = do.call(rbind, permuted_effs_prot_data)
}
gc()
# if globals_list$do_permute:
# [[1]] is the data
# [[2]] is a list of length 2
# Both are matrices of effs, dists, permuted labels, perm_iters
return(list(effs_data=effs_data, permuted_effs_data=list(inf=permuted_effs_inf_data,
prot=permuted_effs_prot_data)))
}
# NOTE: eff, NOT effs
#' generate_effs_data
#'
#' @export
generate_effs_data = function(eff, dists, unprot, cent_unprot){
if (length(eff) > 1){
warning("Please pass a single efficacy value. Aborting...", .call=TRUE)
quit()
}
num_prot = floor(eff * globals_list$n)
num_unprot = globals_list$n - num_prot
dists_data_list = lapply(dists, generate_dists_data, eff, unprot, cent_unprot, num_prot, num_unprot)
if (is.null(globals_list$do_permute) || !globals_list$do_permute){
dists_data = do.call(rbind, dists_data_list)
return(dists_data)
} else {
dists_data = lapply(dists_data_list, function(x){ return(x$data) })
dists_data = do.call(rbind, dists_data)
# NOTE: jank
permuted_dists_inf_data = lapply(dists_data_list, function(x){ return(x$permuted_data$inf) })
permuted_dists_inf_data = do.call(rbind, permuted_dists_inf_data)
permuted_dists_prot_data = lapply(dists_data_list, function(x){ return(x$permuted_data$prot) })
permuted_dists_prot_data = do.call(rbind, permuted_dists_prot_data)
}
return(list(dists_data=dists_data, permuted_dists_data=list(inf=permuted_dists_inf_data,
prot=permuted_dists_prot_data)))
}
#' generate_dists_data
#'
#' @export
generate_dists_data = function(dist, eff, unprot, cent_unprot, num_prot, num_unprot){
cent_prot = cent_unprot + (dist / sqrt(2))
prot = mvrnorm(globals_list$n, cent_prot, diag(length(cent_prot)))
# ---------------------------
# B. Generate infected and uninfected
inf_unprot = sample(1:globals_list$n, globals_list$n, replace=FALSE)
inf = as.matrix(unprot[inf_unprot,])
uninf = rbind(prot[sample(1:globals_list$n, num_prot, replace=FALSE),],
unprot[sample(1:globals_list$n, num_unprot, replace=FALSE),])
# Jitter points in uninf that are the same as those in inf (sampled from unprot)
# This serves no statistical purpose, but makes visualization less fraught,
# i.e. no thinking there's a problem (cough mislabeling as RNs)
# when points of different shapes are overlapping.
tmp = rbind(inf, uninf)
dupe_idx = (1:nrow(tmp))[duplicated(tmp)] - nrow(inf)
uninf[dupe_idx,] = apply(uninf[dupe_idx,], 2, jitter, factor=500)
tmp = rbind(inf, uninf)
num_rows = nrow(inf) + nrow(uninf)
prot_groups = c(rep(globals_list$UnprotNum, nrow(inf)),
rep(globals_list$ProtNum, num_prot),
rep(globals_list$UnprotNum, num_unprot))
inf_groups = c(rep(globals_list$InfNum, nrow(inf)),
rep(globals_list$UninfNum, nrow(uninf)))
data = as.matrix(data.frame(eff=rep(eff, num_rows),
dist=rep(dist, num_rows),
rbind(inf, uninf),
inf_groups, prot_groups))
data = apply(data, 2, as.numeric)
if (is.null(globals_list$do_permute) || !globals_list$do_permute){
return(data)
} else {
permuted_data = wrap_permute_data(data)
}
return(list(data=data, permuted_data=permuted_data))
}
# ---------------------------
# Requires:
# globals_list$permute_iters
# library(prodlim)
#' wrap_permute_data
#'
#' @export
wrap_permute_data = function(dist_data){
if (is.null(globals_list$permute_iters)){
warning("globals_list$permute_iters not set - defaulting to 100. Buckle up!", call.=TRUE)
globals_list$permute_iters <<- 100
}
permuted_dist_data = lapply(1:globals_list$permute_iters, permute_data, dist_data)
# NOTE: jank
permuted_dist_inf_data = lapply(permuted_dist_data, function(x){ return(x$inf) })
permuted_dist_prot_data = lapply(permuted_dist_data, function(x){ return(x$prot) })
permuted_dist_inf_data = do.call(rbind, permuted_dist_inf_data)
permuted_dist_prot_data = do.call(rbind, permuted_dist_prot_data)
gc()
return(list(inf=permuted_dist_inf_data, prot=permuted_dist_prot_data))
}
# ---------------------------
# Requires:
# fcan
# Reassign ratio number of inf and uninf to the opposite group number.
# Ratio of 0.5 is "perfectly permuted", i.e. as opposite as possible without
# being an inversion.
#' permute_data
#'
#' @export
permute_data = function(iter, dist_data, ratio=0.5){
permuted_inf_groups = dist_data[,"inf_groups"]
permuted_prot_groups = dist_data[,"prot_groups"]
status_types = c("inf", "prot")
groups_list = list(inf=dist_data[,"inf_groups"], prot=dist_data[,"prot_groups"])
results = vector("list", length(groups_list))
for (i in 1:length(groups_list)){
group_data = groups_list[[i]]
tmp = group_data
groups_levels = unique(group_data)
# If not, we have nothing to permute
if (length(groups_levels) > 1){
# NOTE: jank. Only works for two groups; "switching" labels doesn't
# mean anything for more than two groups.
for (group in groups_levels){
group_indices = which(group_data == group)
permute_indices = sample(group_indices, ratio*length(group_indices), replace=FALSE)
# Switch the groups
tmp[permute_indices] = groups_levels[which(groups_levels != group)]
}
}
permuted_data = dist_data
if (class(permuted_data)[1] == "data.frame"){
toss = c("V1", "V2", "V3")
} else if (class(permuted_data)[1] == "matrix"){
toss = c("X1", "X2", "X3")
}
cname = paste(status_types[i], "_groups", sep='')
permuted_data = permuted_data[,!colnames(permuted_data) %in% toss]
permuted_data[,cname] = tmp
permuted_data = cbind(permuted_data, perm_iter=iter)
results[[i]] = permuted_data
}
names(results) = status_types
gc()
return(results)
}
# ---------------------------
# WRAPPER FOR IT ALL
# ---------------------------
# User doesn't care how you get it, just that they get it
#' generate_seed_data
#'
#' @export
generate_seed_data = function(seeds, dims, dists, effs, results_dir, dogen){
if (dogen){
g_s_d_expr = expression(
get_seed_data(seed, globals_list$n, dims, dists, effs)
)
cat("Generating data...\n")
if (globals_list$par_seeds){
seed_data = foreach (seed=seeds, .inorder=FALSE, .export=c("globals_list"), .packages=c("ksmthesis")) %dopar% {
return(eval(g_s_d_expr))
}
} else {
seed_data = foreach (seed=seeds) %do% {
return(eval(g_s_d_expr))
}
}
names(seed_data) = seeds
save(seed_data, file=paste(results_dir, "seed_data", sep=''))
} else {
load(paste(results_dir, "seed_data", sep=''))
}
return(seed_data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.