Nothing
readData <- function( Y_input, X_input, RandomSeed = 99){
set.seed(RandomSeed)
if (is.null(Y_input)) stop("Y_input is NULL")
NA_Y_mat = is.na( Y_input )
Y_input <- data.frame(Y_input)
if (is.null(X_input)) stop("X_input is NULL")
NA_X_mat = is.na( X_input )
X_input <- data.frame(X_input)
n_sample = dim(Y_input)[[1]] ; p_Y = dim(Y_input)[[2]]
Y_mat_std = array(0,c(n_sample,p_Y))
mean_Y_input = sd_Y_input = rep(0,p_Y)
for (l in 1:p_Y){
mean_Y_input[l] = mean(Y_input[,l], na.rm=TRUE)
sd_Y_input[l] = sd(Y_input[,l], na.rm=TRUE)
Y_mat_std[,l] = ( Y_input[,l] - mean_Y_input[l] ) / sd_Y_input[l]
}
p_X = dim(X_input)[[2]] ; D_l_vec = rep(0,p_X)
X_mat_std = array(0,c(n_sample,p_X))
levels_X_input = list()
for (l in 1:p_X){
# unique_l = sort(unique(X_input[,l]))
levels_X_input[[l]] = unique_l = levels(X_input[,l])
D_l_vec[l] = length(unique_l)
for (i in 1:n_sample){
if (is.na(X_input[i,l])){
X_mat_std[i,l] = X_input[i,l]
} else {
X_mat_std[i,l] = which( X_input[i,l] == unique_l ) - 1
}
}
} # for (l)
var_names <- list(y=names(Y_input), x=names(X_input))
names(levels_X_input) <- names(X_input)
orig_data <- cbind(Y_input, X_input) #### new
HCMM_input = list(n_sample=n_sample, p_Y=p_Y, Y_mat_std=Y_mat_std, mean_Y_input=mean_Y_input, sd_Y_input=sd_Y_input, NA_Y_mat = NA_Y_mat, p_X=p_X, D_l_vec=D_l_vec, X_mat_std=X_mat_std, levels_X_input=levels_X_input, NA_X_mat = NA_X_mat, var_names = var_names, orig_data = orig_data)
class(HCMM_input) <- "readData_passed"
return(HCMM_input)
} # readData <- function
createModel <- function(data_obj, max_R_S_K=c(30,50,20)){ # , r_i_vec, s_i_vec, k_i_vec
if ( !is(data_obj, "readData_passed") ) stop("data_obj should be generated by 'readData' function") ;
model <- new(modelobject, max_R_S_K)
Y_mat = data_obj$Y_mat_std ; X_mat = data_obj$X_mat_std ;
Y_NA_mat = is.na(Y_mat) ; X_NA_mat = is.na(X_mat) ;
Y_mat[is.na(Y_mat)] = 0.0 ; X_mat[is.na(X_mat)] = 0 ;
# print(head(Y_in)) ; print(head(X_in)) ;
model$Y_mat = as.matrix(Y_mat) ; model$.Y_NA_mat = as.matrix(Y_NA_mat) ;
model$X_mat = as.matrix(X_mat) ; model$.X_NA_mat = as.matrix(X_NA_mat)
model$.D_l_vec = data_obj$D_l_vec
model$msg_level = 2
# 0: errors; 1: error and warnings; 2: errors, warnings and info
model$.Initialization()
return(model)
} # Initialize <- function
### multipleSyn
multipleSyn <- function(data_obj, model_obj, n_burnin, m, interval_btw_Syn, show_iter=TRUE){
int_print = min(200,floor(interval_btw_Syn/2)) ;
if ( !is(model_obj, "Rcpp_modelobject") ) stop("model_obj needs to be prepared by 'createModel' function") ;
n_sample = dim(model_obj$Y_mat)[[1]] ; p_y = dim(model_obj$Y_mat)[[2]] ; p_x = dim(model_obj$X_mat)[[2]]
r_i_cube = array(0,c(m,n_sample)) ; s_i_cube = array(0,c(m,n_sample)) ; k_i_cube = array(0,c(m,n_sample)) ;
Synt_data <- replicate(m,
list(Y_Synt=array(0, c(n_sample, p_y)),
X_Synt=array(0, c(n_sample, p_x))), simplify=FALSE)
total_iter = n_burnin + m * interval_btw_Syn
count_iter = 0
if (show_iter==TRUE){
start_time = current_time = Sys.time(); message(paste0("Current time, ",current_time)) ; prev_time = current_time # 1.3.4
}
message( paste0("Total iteration = ", (total_iter) ) )
message("Burn-in ..................................")
if ( n_burnin > int_print ){
n_repeat_burnin = floor( n_burnin / int_print ) ; resid_n = n_burnin - n_repeat_burnin * int_print ;
for (i_repeat in 1:n_repeat_burnin){
for (i_run in 1:int_print){
model_obj$.Iterate()
count_iter = count_iter + 1
}
current_time = Sys.time()
if (show_iter==TRUE){
int_time = round( as.numeric(difftime(current_time, prev_time, units = "mins")), 1 )
message(paste("Current time, ",current_time))
message(paste0("Iter=",(i_repeat*int_print),", ",int_time," min for previous ", int_print, " iterations"))
cat("table(r_i_vec)=\n") ; print(table(model_obj$r_i_vec+1))
cat("\ntable(s_i_vec)=\n") ; print(table(model_obj$s_i_vec+1))
cat("\ntable(k_i_vec)=\n") ; print(table(model_obj$k_i_vec+1)); cat("\n")
}
prev_time = current_time
} # for (i_repeat)
if (resid_n>0){
for (i_run in 1:resid_n){
model_obj$.Iterate()
count_iter = count_iter + 1
}
}
} else {
for (i_run in 1:n_burnin){
model_obj$.Iterate()
count_iter = count_iter + 1
}
} # if (n_burn_in)
message("Drawing completed datasets ............")
for ( i_imp in 1:m ){
## Run (interval_btw_Syn) iterations for parameter updates ##
for (i_run in 1:interval_btw_Syn){
model_obj$.Iterate()
count_iter = count_iter + 1
}
## Draw an imputed dataset for every (interval_btw_Syn) iterations ##
r_i_cube[i_imp,] = model_obj$r_i_vec+1 ; s_i_cube[i_imp,] = model_obj$s_i_vec+1 ; k_i_cube[i_imp,] = model_obj$k_i_vec+1
# Synthetic dataset #
model_obj$.Synthesis()
for (i_sample in 1:n_sample){
Synt_data[[i_imp]]$Y_Synt[i_sample,] = data_obj$mean_Y_input + data_obj$sd_Y_input * model_obj$Synt_Y_mat[i_sample,]
}
for(l in 1:p_x){
Synt_data[[i_imp]]$X_Synt[,l] <- data_obj$levels_X_input[[l]][model_obj$Synt_X_mat[,l] + 1]
}
Synt_data[[i_imp]]$Y_Synt <- data.frame(Synt_data[[i_imp]]$Y_Synt); names(Synt_data[[i_imp]]$Y_Synt) <- data_obj$var_names$y
Synt_data[[i_imp]]$X_Synt <- data.frame(Synt_data[[i_imp]]$X_Synt); names(Synt_data[[i_imp]]$X_Synt) <- data_obj$var_names$x
for(l in 1:p_x){
Synt_data[[i_imp]]$X_Synt[,l] <- factor(Synt_data[[i_imp]]$X_Synt[,l], levels=data_obj$levels_X_input[[l]])
}
Synt_data[[i_imp]] <- cbind(Synt_data[[i_imp]]$Y_Synt, Synt_data[[i_imp]]$X_Synt)
} # for ( i_imp )
Synthetic_result = list(synt_data = Synt_data, comp_mat = list(r_mat = r_i_cube, s_mat = s_i_cube, k_mat = k_i_cube), orig_data = data_obj$orig_data) # The last two objects are newly added
message("Finished...............................")
if (show_iter==TRUE){
current_time = Sys.time(); message(paste("Current time,",current_time))
message( paste0("Total time for the ",total_iter," iterations = ", round(current_time-start_time, 3) ) )
}
class(Synthetic_result) <- "synMicro_object"
return(Synthetic_result)
}
print.synMicro_object <- function(x, ...){
## print multipleSyn results
cat(paste0("Number of synthetic data sets: ", length(x$synt_data), "\n"))
cat(paste0("Data size: ", nrow(x$orig_data), " obs. of ", ncol(x$orig_data), " variables \n"))
cat("-------------------------------------------\n")
cat("First synthetic dataset:\n")
print(head(x$synt_data[[1]]))
cat("\nMixture component of the first synthetic dataset:\n")
cat(" $r_mat: ", head(x$comp_mat$r_mat[1,]), " ...\n")
cat(" $s_mat: ", head(x$comp_mat$s_mat[1,]), " ...\n")
cat(" $k_mat: ", head(x$comp_mat$k_mat[1,]), " ...\n")
}
summary.synMicro_object <- function(object, max_print = 4, ...){
## print multipleSyn results
is_Y <- sapply(object$orig_data, is.numeric)
is_X <- !is_Y
n_print <- ifelse(length(object$synt_data)>max_print, max_print, length(object$synt_data))
results <- list() ## final output
results$continuous <- list()
## continuous
for(i in which(is_Y)){
orig <- summary(object$orig_data[,i])[1:6]
synt <- sapply(object$synt_data, function(s) summary(s[,i]))
qbar <- rowMeans(synt)
bm <- apply(synt, MARGIN=1, sd)
# res_tmp <- data.frame(cbind(orig, qbar, bm, synt[,1:n_print]))
# names(res_tmp) <- c("Orig.", "Q_bar", "B_m", paste0("Synt.", 1:n_print))
res_tmp <- data.frame(cbind(orig, qbar, bm, synt))
names(res_tmp) <- c("Orig.", "Q_bar", "B_m", paste0("Synt.", 1:ncol(synt)))
results$continuous[[names(object$orig_data)[i]]] <- res_tmp
}
## categorical
results$categorical <- list()
for(i in which(is_X)){
orig <- table(object$orig_data[,i])
synt <- sapply(object$synt_data, function(s) table(s[,i]))
qbar <- rowMeans(synt)
res_tmp <- data.frame(cbind(orig, qbar, synt))
names(res_tmp) <- c("Orig.", "Q_bar", paste0("Synt.", 1:ncol(synt)))
results$categorical[[names(object$orig_data)[i]]] <- res_tmp
}
## print
cat("Number of synthetic data sets:", length(object$synt_data), "\n")
cat("Data size:", nrow(object$orig_data), "obs. of", ncol(object$orig_data), "variables \n")
cat("-------------------------------------------\n")
cat("Continuous:", names(object$orig_data)[is_Y], "\n")
print(lapply(results$continuous, function(sss) sss[,1:(3+n_print)]))
cat("-------------------------------------------\n")
cat("Categorical:", names(object$orig_data)[is_X], "\n")
print(lapply(results$categorical, function(sss) sss[,1:(2+n_print)]))
}
print.readData_passed <- function(x, ...){
## print readData results
cat("Original data: ", x$n_sample, "obs. of", (x$p_Y+x$p_X), "variables \n")
print(head(x$orig_data))
cat("-------------------------------------------\n")
cat("Number of missing values:\n")
cat(" Continiuous: \n ")
print(colSums(x$NA_Y_mat))
cat("\n Categorical: \n ")
print(colSums(x$NA_X_mat))
}
.Beta_cube_fn = function(data_obj, model_obj){
Beta_with_std_y = model_obj$.Beta_cube
p_y = dim(model_obj$.Beta_cube)[[1]] ; p_x_star = dim(model_obj$.Beta_cube)[[2]] ; R = dim(model_obj$.Beta_cube)[[3]]
mean_y = data_obj$mean_Y_input ; sd_y = data_obj$sd_Y_input
Beta_orig = Beta_with_std_y
for ( r in 1:R ){
l = 1
Beta_orig[,l,r] = mean_y + sd_y * Beta_with_std_y[,l,r] # mean_y should be included. it is tested
for ( l in 2:p_x_star){
Beta_orig[,l,r] = sd_y * Beta_with_std_y[,l,r]
} # for (l)
} # for (r)
return(Beta_orig)
} # Beta_cube_orig_scale
plot.synMicro_object <- function(x, vars, plot_num=NULL, ...){
### Check input objects ####
if(is.numeric(vars)) vars <- names(x$orig_data)[vars] ## index -> variable names
if(!all(vars %in% names(x$orig_data))) stop("invalid variable names") ## variable name error
var_y <- names(x$orig_data)[sapply(x$orig_data, class)=="numeric"]
var_x <- names(x$orig_data)[sapply(x$orig_data, class)=="factor"]
y_test <- vars %in% var_y
x_test <- vars %in% var_x
if(all(any(y_test),any(x_test))) stop("Only one can be selected between categorical and continuous")
if(!is.null(plot_num)){ ## plot num: number of synthetic data...
if(!(plot_num %in% 1:length(x$synt_data))) stop("invalid synthetic data number")
## continuous-type
if(any(y_test)){
switch(sum(y_test),
'1'={
plot_data <- data.frame(Original=x$orig_data[,vars], Synthetic = x$synt_data[[plot_num]][,vars])
boxplot(plot_data, main=paste0(vars, ": Synthetic ", plot_num), col=c("grey", "royalblue"), lwd=2, ylim = range(plot_data))
},
'2'={
plot(formula(paste0(vars, collapse="~")), data=x$orig_data, pch=16, col="grey", main=paste0("Synthetic ", plot_num))
points(formula(paste0(vars, collapse="~")), data=x$synt_data[[plot_num]], col="royalblue")
legend("topright", c("Original", "Synthetic"), pch=c(16, 1), col = c("grey", "royalblue"))
},
{stop("only one plot can be output")})
}
if(any(x_test)){
if(sum(x_test)==1){
plot_data <- t(cbind(table(x$orig_data[,vars]),table(x$synt_data[[plot_num]][,vars])))
barplot(plot_data,
main=paste0(vars, ": Synthetic ", plot_num),
beside = TRUE, col = c("grey", "royalblue"), ylim = c(0, max(plot_data)*1.5),
legend.text = c("Original", "Synthetic"),
args.legend = list(bty="n"))
}else{
stop("only one plot can be output")
}
}
}else{
## continuous-type ####
if(any(y_test)){
if(sum(y_test)==1){ ## box-plot: one variable
synth_data <- data.frame(sapply(x$synt_data, function(s) s[vars]))
for(m_rep in 1:length(x$synt_data)){
readline(prompt="Press [enter] to continue...")
plot_data <- data.frame(Original=x$orig_data[,vars], Synthetic = synth_data[,m_rep])
boxplot(plot_data, main=paste0(vars, ": Synthetic ", m_rep), col=c("grey", "royalblue"), lwd=2, ylim = range(x$orig_data[,vars],synth_data))
}
}else{ ## scatter plot: more than 2
var_list <- apply(combn(vars, 2), MARGIN=2, function(vvv) paste0(vvv, collapse = " ~ "))
for(var_rep in 1:length(var_list)){
for(m_rep in 1:length(x$synt_data)){
readline(prompt="Press [enter] to continue...")
plot(formula(var_list[var_rep]), data=x$orig_data, pch=16, col="grey", main=paste0("Synthetic ", m_rep))
points(formula(var_list[var_rep]), data=x$synt_data[[m_rep]], col="royalblue")
legend("topright", c("Original", "Synthetic"), pch=c(16, 1), col = c("grey", "royalblue"))
}
}
}
}
## categorical-type ####
if(any(x_test)){
for(var_rep in vars){
synth_data <- sapply(x$synt_data, function(s) table(s[,var_rep]))
for(m_rep in 1:length(x$synt_data)){
readline(prompt="Press [enter] to continue...")
barplot(t(cbind(table(x$orig_data[,var_rep]), synth_data[,m_rep])), main=paste0(var_rep, ": Synthetic ", m_rep),
beside = TRUE, col = c("grey", "royalblue"), ylim = c(0, ceiling(max(synth_data, table(x$orig_data[,var_rep]))*1.5)),
legend.text = c("Original", "Synthetic"),
args.legend = list(bty="n"))
}
}
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.