########################################
##########################################
# Version 10 is the same as Version 9
# Version 6.3: modify MaskDensity6.2
# Version 6.2 does not work for categorical variable
# neworder in function"findOrder_Rmask" is also changed
#
# let a and b decide by data agency as well as a and b determined by alpha
#
# This version is modified based on MaskDensity11.R
# The method of estimating the mass function of a categorical variable
# is changed.
#############################################
###########################################
# In this version of MaskDensity, the value of CORmax is reported.
# The value will be used to show the performance of the approximant of density function.
encriptNoise <- function(noise, a, b, maxorder, levels, EPS, noisefile)
{
lvls <- levels
save(a, b, maxorder, lvls, noise, EPS, file=noisefile)
}
calc_muX <- function (muY, a, b)
{
muX <- 0 * muY
muX[1] <- 1
for (j in 1:(length(muY) - 1)) {
k <- 0:j
muX[j + 1] <- sum(choose(j, k) * 2^k * muY[k + 1] *
(-1)^(j - k) * (a + b)^(j - k))/(b - a)^j
}
return(muX)
}
P_Rmask <- function (x, k)
{
SUM <- 0 * x
for (i in 0:floor(k/2)) SUM <- SUM + (-1)^i/2^k *
exp(lgamma(2 * k - 2 * i + 1) - lgamma(i + 1) -
lgamma(k - i + 1) - lgamma(k - 2 * i + 1)) * x^(k - 2 * i)
return(SUM)
}
lambda_Rmask <- function (muX, k)
{
i <- 0:floor(k/2)
val <- (2 * k + 1)/2 * sum((-1)^i/2^k * exp(lgamma(2 *
k - 2 * i + 1) - lgamma(i + 1) - lgamma(k - i + 1) -
lgamma(k - 2 * i + 1)) * muX[k - 2 * i + 1])
return(val)
}
fY_Rmask <- function(y, muY, a, b)
{
x <- (2 * y - (a + b))/(b - a)
muX <- calc_muX(muY, a, b)
SUM <- 0 * y
for (k in 0:(length(muY) - 1)) SUM <- SUM + lambda_Rmask(muX, k) * P_Rmask(x, k)
SUM[(y < a) || (y > b)] <- 0
val <- SUM * 2/(b - a)
val[is.na(val)] <- 0
return(val)
}
moments <- function (y, order = 8)
{
val <- 1
if (order > 0) for (k in 1:order)
val <- c(val, mean(y^k))
return(val)
}
density_Rmask <- function (moments, a, b, n = 512)
{
span <- 25
fYseq <- yseq<- seq(a, b, length.out = n)
for(i in 1:n) {
jseq <- i + (-span):span
if(i <= span) jseq <- 1:i
if(i > n-span) jseq <- i:n
fYseq[i] <- length(jseq)/(2*span+1) * mean(fY_Rmask(yseq[jseq], moments, a, b))
}
scale=sum(fYseq)
negative <- (1:n)[fYseq <= 0]
fYseq[negative] <- 0
output <- list(x = yseq, y = fYseq, call=sys.call())
#class(output) <- "density"
return(output)
}
findOrder_Rmask <- function(ystar, noise, a, b, maxorder=100, EPS)
{
M <- mean(noise)
ymoments <- moments(ystar/M, order=maxorder) /
moments(noise/M, order=maxorder)
#############################################
# this part is changed
#
#neworder<-NULL # the begining of changes
#for(i in 1:maxorder){
# if(is.finite(ymoments[i]))
# neworder<-i
# }
# maxorder<-neworder-1 # end of changes
#############################
# this code is new
################
neworder<-maxorder # the begining of changes
for(i in 1:maxorder+1){
if(is.infinite(ymoments[i]))
neworder<-i-1
break
}
maxorder<-neworder # end of changes
###################
C <- sample(noise, size=length(ystar), replace=TRUE)
CORsamp <- NULL
CORmax <- -Inf
for(m in 1:maxorder) {
cat("Trying", m, "moments ../\n")
Provost <- density_Rmask(ymoments[1:m], a, b)
############
#####
#
### modify here
#
##################
# if (sum(Provost$y)==0)
if (sum(is.nan(Provost$y)) >=1)
{
cat("NaN in probabilty vector../\n")
break
}
if (sum(Provost$y)==0)
{
cat("NA in probabilty vector../\n")
break
}
else
{
prob <- Provost$y/sum(Provost$y)
y1 <- sample(Provost$x, size = length(ystar), prob=prob, replace=TRUE)
y1star <- y1*C
COR <- round(10000*cor(sort(ystar), sort(y1star)))/10000
if(COR > (1+EPS)*CORmax) {
CORmax <- COR
mOPT <- m
}
}
CORsamp <- c(CORsamp, COR)
if(COR < 1 - 10*(1-CORmax)) break
}
#cat("unmask:", mOPT, "moments used.\n")
cat("unmask:", mOPT, "moments used.\n", CORmax, "correlation. \n")
return(mOPT)
}
createNoise <- function (n, mean = rep(NA, 5), sd = rep(1, length(mean)),prob = rep(1, length(mean))/length(mean))
{
k <- length(mean)
if (is.na(sum(mean)))
{
best <- rnorm(k, 500, 200)
MAX <- min(dist(best))
for (i in 1:1000) {
mean <- rnorm(k, 500, 200)
if ((min(dist(mean)) > MAX) && (min(mean) > 50)) {
MAX <- min(dist(mean))
best <- mean
}
}
mean <- best
}
C <- rnorm(n)
grp <- sample(1:length(mean), n, prob = prob, replace = TRUE)
for (i in 1:length(mean))
C[grp == i] = mean[i] + sd[i] * C[grp == i]
C[C < 0] <- -C[C < 0]
return(C)
}
######## the following mask function is modified #####
mask <- function (y, noisefile, noise = createNoise(length(y)), a1=min(y), b1=max(y),
maxorder = 100, EPS = 1e-06)
{
n <- length(y)
if(length(noise) != n) stop("'y' and 'noise' lengths differ")
if(sum(noise < 0) > 0) stop("Negative noise not allowed")
lvls <- NULL
FACTOR <- is.factor(y)
if (FACTOR)
{
k <- length(levels(y))
lvls <- levels(y)
y1 <- rep(NA, length(y))
for (i in 1:k) y1[y == lvls[i]] <- i
y <- y1
}
if (sum(noise < 0) > 0)
stop("Negative noise not allowed")
ndens <- density(noise)
prob <- ndens$y/sum(ndens$y)
ystar = y * noise
C2 <- sample(noise, size = 10*n, replace = TRUE)
if (FACTOR)
{
a <- 0
b <- k + 1
}
else {
a<- a1
b<- b1
k <- 0
}
encriptNoise(C2, a, b, maxorder, lvls, EPS, noisefile)
return(list(ystar = ystar, noisefile = noisefile))
}
####the following unmask_Rmask is modified #######
#########
# unmask_Rmask1
#########
unmask <- function (ystar, noisefile, noise)
{
load(noisefile)
outMeanOfNoise <- mean(noise)
outMeanOfSquaredNoise <- mean(noise^2)
unmask_Rmask1 <- function(ystar) #modified
{
if(!exists("noise")) stop("Problem with 'noisefile'")
if(!exists("a")) stop("Problem with 'noisefile'")
if(!exists("b")) stop("Problem with 'noisefile'")
if(!exists("maxorder")) stop("Problem with 'noisefile'")
if(!exists("EPS")) stop("Problem with 'noisefile'")
order <- findOrder_Rmask(ystar, noise, a, b, maxorder = maxorder, EPS = EPS)
M <- mean(noise)
ymoments <- moments(ystar/M, order=order) /moments(noise/M, order=order)
return(list(a=a, b=b, levels=lvls, ymoments=ymoments))
}
##########
# unmask_Rmask2
#########
unmask_Rmask2 <- function(ystar, alpha) #modified
{
if(!exists("noise")) stop("Problem with 'noisefile'")
if(!exists("a")) stop("Problem with 'noisefile'")
if(!exists("b")) stop("Problem with 'noisefile'")
if(!exists("maxorder")) stop("Problem with 'noisefile'")
if(!exists("EPS")) stop("Problem with 'noisefile'")
if (is.null(lvls)){
a1<- mean(ystar)/mean(noise)-sqrt(1/alpha)*sqrt(mean(ystar^2)/mean(noise^2)-(mean(ystar)/mean(noise))^2)
b1<- mean(ystar)/mean(noise)+sqrt(1/alpha)*sqrt(mean(ystar^2)/mean(noise^2)-(mean(ystar)/mean(noise))^2)
a<-max(a,a1)
b<-min(b,b1)
}
order <- findOrder_Rmask(ystar, noise, a, b, maxorder = maxorder, EPS = EPS)
M <- mean(noise)
ymoments <- moments(ystar/M, order=order) /moments(noise/M, order=order)
return(list(a=a, b=b, levels=lvls, ymoments=ymoments))
}
###### original code ####
# info <- unmask_Rmask(ystar, noisefile, alpha) ##### changed
# size <- length(ystar)
# a <- info$a
# b <- info$b
# lvls <- info$levels
# ymoments <- info$ymoments
# dens <- density_Rmask(ymoments, a, b)
######### end ##########
#DensM<-NULL # modified
DensA<-NULL
DensB<- NULL
#DensLvls<-NULL
sampDens<-NULL # use thid to instore the sample from the unmasked density
corMatrix<-matrix(0,6,2)
################
# Add a function for discrete case. For discrete variables, their mass functions
# are estimated from a different way.
##############
k<-length(lvls)
if (k !=0){
Ma<-seq(1,k, by=1)
MA<-Ma
Mb<-Ma
for(i in 1:(k-1)){
Mb<-Mb*Ma
MA<-cbind(MA,Mb)
}
MA<-t(MA)
Mnoise<-diag(k)
Mnoise[1,1]<-mean(noise)
noisePower<-noise
for( i in 1:(k-1)){
noisePower<-noisePower*noise
Mnoise[i+1,i+1]<-mean(noisePower)
}
MstarY<-mean(ystar)
ystarPower<-ystar
for(i in 1:(k-1)){
ystarPower<-ystarPower*ystar
MstarY<-c(MstarY, mean(ystarPower))
}
Poutput<-solve(MA)%*%solve(Mnoise)%*%MstarY
if(min(Poutput) > 0){
sizeOut<-length(ystar)
y1<-sample(Ma, size=sizeOut, replace=TRUE, prob=Poutput)
return(list(unmaskedVariable = y1, prob=Poutput, meanOfNoise = outMeanOfNoise, meanOfSquaredNoise = outMeanOfSquaredNoise))
}else{ # if matrix is singluar
############ end of modification
###############
# the method used to estimate the mass function of a categorical variable is changed.
# the old method is kept below
#############
# (from here)
#############
warning("Method of moments failed, using k-means instead",immediate. =T)
info <- unmask_Rmask1(ystar) ##### changed
size <- length(ystar)
a <- info$a
b <- info$b
ymoments <- info$ymoments
dens <- density_Rmask(ymoments, a, b)
densK<-dens # this density information is for categorical case
y1 <- sample(densK$x, size = size, replace = TRUE, prob = densK$y/sum(densK$y))
centers <- 1:k
OK <- FALSE
rmv <- NULL
repeat {
try({ out <- kmeans(y1, centers=centers[!centers %in% rmv])$cluster;
OK <- TRUE })
if(OK) {
break
}
y2 <- round(y1)
for(i in centers) {
if(sum(y2 == i) == 0) {
rmv <- c(rmv, i)
}
}
}
out <- as.factor(lvls[out])
levels(out) <- lvls
Poutput<-summary(out)/length(out)
# return(out)
return(list(unmaskedVariable = out, prob=Poutput, meanOfNoise = outMeanOfNoise, meanOfSquaredNoise = outMeanOfSquaredNoise))
}
}
else{ # continuous case
###########
# determine by a and b
#######
info <- unmask_Rmask1(ystar) ##### changed
size <- length(ystar)
a <- info$a
b <- info$b
lvls <- info$levels
ymoments <- info$ymoments
dens <- density_Rmask(ymoments, a, b)
yy1 <- sample(dens$x, size = size, replace = TRUE, prob = dens$y/sum(dens$y))
c1<-sample(noise, size=length(ystar), replace=TRUE)
yy1star<-yy1*c1
corMatrix[1,1] <- round(10000*cor(sort(ystar), sort(yy1star)))/10000
#DensM<-cbind(DensM, dens) # modeified
DensA<-c(DensA,a)
DensB<-c(DensB,b)
sampDens<-cbind(sampDens, yy1)
#######
# determined by alpha
###########
size <- length(ystar)
corMatrix[1,2]<-1
for (i in 1:5){
alpha=0.01*i
corMatrix[i+1,2]<-i+1
info <- unmask_Rmask2(ystar, alpha) ##### changed
a <- info$a
b <- info$b
lvls <- info$levels
ymoments <- info$ymoments
dens <- density_Rmask(ymoments, a, b)
yy1 <- sample(dens$x, size = size, replace = TRUE, prob = dens$y/sum(dens$y))
c1<-sample(noise, size=length(ystar), replace=TRUE)
yy1star<-yy1*c1
corMatrix[i,1] <- round(10000*cor(sort(ystar), sort(yy1star)))/10000
#DensM<-cbind(DensM, dens) # modeified
DensA<-c(DensA,a)
DensB<-c(DensB,b)
sampDens<-cbind(sampDens, yy1)
}
corMatrix<-corMatrix[order(corMatrix[,1]),] #increased based on the values of cor
selected<-corMatrix[6,2]
size <- length(ystar)
a <- DensA[selected]
b <- DensB[selected]
y1<- sampDens[, selected]
return(list(unmaskedVariable = y1, meanOfNoise = outMeanOfNoise, meanOfSquaredNoise = outMeanOfSquaredNoise))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.