pcm2data <- function(pcm, n=10000)
{
N <- ncol(pcm)
# cholesky decomposition of the correlation matrix.
cm.chol <- chol(pcor2cor(pcm))
#normal with mu = 0, sd = sd
z <- matrix(rnorm(n*N),ncol = n)
# Generate the data by multiplying the cholesky decomposed matrix
# to the random matrix z
y <- t(t(cm.chol) %*% z)
scale(y)
}
clean <- function(x, y = NULL, itself = TRUE)
{
# clean a set of variables (x) from the variance shared with
# another set of variables (y). If itself = TRUE, the x variables
# are also cleaned from each other.
# if y = NULL, cleans only the x variables from one another.
if(is.null(dim(x)[2])) x <- matrix(x, ncol=1)
# clean from y
if(!is.null(y))
{
if(is.null(dim(y)[2])) y <- matrix(y, ncol=1)
y <- y[,!apply(apply(y, 2, is.na),2,any)] #remove NA columns from Y
fit <- lm(x~y)
x_pure <- fit$residuals
} else x_pure <- x
# clean from themselves, using a recursive call
if(itself & ncol(x) > 1)
{
for(i in 1:ncol(x))
{
x_pure[,i] <- clean(x = x_pure[,i], y = x_pure[,-i], itself = FALSE)
}
}
x_pure
}
Clone <- function(pcm, assi, n = 1000, furthersharedvar = 1, rndvar = 0, p = 1, keeporig = FALSE)
{
N <- ncol(pcm)
if(length(assi)!= N) stop("The assignment entered is not compatible with the size of the correlation matrix")
# generate an n*N data matrix encoding the same correlation structure
y <- pcm2data(pcm, n)
output <- list()
output$orig <- y
N2 <- sum(assi)
origvar <- 1-rndvar
assignments <- c()
for(i in 1:length(assi)) assignments <- c(assignments, rep(i, assi[i]))
y.cloned <- matrix(ncol = sum(assi), nrow = n)
if(keeporig)
{
orig <- 1
for(i in 1:(length(assi)-1)) orig[i+1] <- sum(assi[1:i])+1
}
for(i in 1:N)
{
# decompose the variance of the original node in two parts, the fitted
# values of variance R^2 and the residuals, of variance 1-R^2
set <- 1:N
fit <- lm(y[,i]~cbind(y.cloned[,assignments%in%set[set<i]], y[,set>i]))
fitval <- fit$fitted.values
residualval <- fit$residuals
R2 <- var(fitval)
if(p == 1)
{
fitmat <- matrix(rep(fitval, assi[i]), ncol = assi[i], nrow = n, byrow = FALSE)
} else
{
fitmat <- matrix(ncol = assi[i], nrow = n)
for(j in 1:assi[i])
{
# identify the subset of predictors, given p
set <- (1:N)[-i]
select <- sample(c(TRUE, FALSE), size = length(set), replace = TRUE, prob= c(p, 1-p))
subset <- set[select]
# ensure that at least one predictor is preserved
if(length(subset) == 0) subset <- sample(set, 1)
# decompose the variance of the original node in two parts, the fitted
# values of variance R^2 and the residuals, of variance 1-R^2
fit <- lm(y[,i]~cbind(y[,subset[subset>i]], y.cloned[,assignments%in%subset[subset<i]]))
fitmat[,j] <- fit$fitted.values
}
# the maximum theoretical variance in fitmat is R2. To make all the
# clones have the same variance, the variance of all the columns of
# fitmat is transformed into R2, by adding a random component to each,
# of variance = "extravariance".
tol = 1e-15
extravariance <- R2 - apply(fitmat,2,var)
tocorrect <- (1:length(extravariance))[extravariance > tol]
if(length(tocorrect) > 0)
{
extravariance <- extravariance[tocorrect]
extraSDs <- sqrt(extravariance)
rndmat <- matrix(rnorm(n*length(tocorrect)), ncol=length(tocorrect), nrow = n)
rndmat <- clean(rndmat, cbind(y[,set>=i], y.cloned[,assignments < i], fitmat))
rndmat <- scale(rndmat)[,]
rndmat <- t(t(rndmat)*extraSDs)
fitmat[,tocorrect] <- fitmat[,tocorrect] + rndmat
}
}
# Further decompose the residual variance
# A portion of the residual variance of furthersharedvar
# is kept intact in the clones of the same node
intactresiduals <- residualval*sqrt(furthersharedvar)
# While another portion is replaced. The random variable "newresiduals"
# is obtained by replacing the remaining residual variance with a new
# component of variance, shared by all nodes
newresiduals <- matrix(rnorm(n*assi[i]), ncol = assi[i])
set <- 1:N
newresiduals <- scale(clean(newresiduals, cbind(y[,set>=i], y.cloned[,assignments < i], fitmat),
itself = TRUE))
newresiduals <- newresiduals*sqrt(1-furthersharedvar)*sqrt(1-R2)
# Assemble the clones of the ith node
y.cloned[,assignments == i] <- (fitmat + intactresiduals + newresiduals)
# if the original node should be kepts, include it
if(keeporig) y.cloned[,orig[i]] <- y[,i]
}
# rndvar: completely random noise, different for each node
unique <- scale(matrix(rnorm(n*N2), ncol = N2))
y.cloned <- scale(y.cloned)*sqrt(origvar) + unique*sqrt(rndvar)
output$cloned <- scale(y.cloned)
output
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.