inst/tests/RNG-mixed.R

## TO BE TRASHED
rm(list=ls())
fn.gambin <- function(corr, g.alpha, g.beta, b.prob, ...){
  np <- length(pars) 
  stopifnot(is.matrix(corr), np==2)
  # z <- chol(corr)%*%matrix(rnorm(np*N), nrow=np)
  z <- mvtnorm::rmvnorm(mean=c(0,0), sigma=corr, n=N)
  # u <- pnorm(t(z))
  u <- pnorm(z)
  
  # rvgamma <- qgamma(u[,1], shape=g.alpha, rate=g.beta)
  rvgamma <- qnorm(u[,1], 0,1)
  rvbern <- as.numeric(u[,2] > b.prob)
  X <- cbind(rvgamma, rvbern)
  if(center.X) X <- scale(x=X, center=center.X)
  return(X)
}

r = 0.8 # correlation coefficient
sigma = matrix(c(1,r,r,1), ncol=2)
s = chol(sigma)
n = 10000
z = s%*%matrix(rnorm(n*2), nrow=2)
u = pnorm(z)

age = qgamma(u[1,], 15, 0.5)
age_bracket = cut(age, breaks = seq(0,max(age), by=5))
success = as.numeric(u[2,]>0.4)
mat <- cbind(age, success)

dat <- fn.gambin(corr=rmat, g.alpha=15, g.beta=0.5, b.prob=0.4)

cor(mat)
colMeans(mat)

rm(list=ls())
fn.norbin <- function(N=1e4, pars=c(1,-1), corr, center.X=FALSE, ...){
  np <- length(pars) 
  stopifnot(is.matrix(corr), np==2)
  z <- mvtnorm::rmvnorm(mean=c(0,0), sigma=corr, n=N)
  u <- pnorm(z)
  
  rvnorm <- qnorm(u[,1])
  # rvlogis <- qlogis(u[,2])
  rvbeta <- qbeta(u[,2], shape1=1, shape2=1)
  # rvbern <- qbinom(u[,2], size=1, prob=b.prob)
  rvbern <- as.numeric(rvbeta > 0.5)
  X <- cbind(rvnorm, rvbern)
  if(center.X) X <- scale(x=X, center=center.X)
  return(X)
}

rmat <- diag(2)
r <- 0.5
rmat[1,2] <- rmat[2,1] <- r
dat <- fn.norbin(corr=rmat)
colMeans(dat)
cor(dat)


## using the idea of copula
## http://www.r-bloggers.com/copulas-made-easy/

# using the R package - copula - http://stackoverflow.com/questions/10535235/generate-correlated-random-numbers-from-binomial-distributions-in-r



## references
## http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.150.322&rep=rep1&type=pdf
## Cario and Nelson (1997) http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.48.281&rep=rep1&type=pdf
## http://www.google.ca/url?sa=t&rct=j&q=&esrc=s&source=web&cd=7&ved=0CG4QFjAG&url=http%3A%2F%2Fwww4.ncsu.edu%2F~pfackler%2Frandcorr.ps&ei=qBJSUpOwDpK4yAHXr4HAAw&usg=AFQjCNFXkOCroYICbm2pe2q-1S72qfs02Q&sig2=aOrdtCgvb1OL60BT-Ar2oA&bvm=bv.53537100,d.aWc&cad=rja
# http://web.ics.purdue.edu/~hwan/IE680/Lectures/Chap08Slides.pdf
# http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.28.8743&rep=rep1&type=pdf

# This reference is not working ::  http://grokbase.com/t/r/r-help/114rwzfaq4/r-random-normal-variable-correlated-to-an-existing-binomial-variable
# http://stats.stackexchange.com/questions/12857/generate-random-correlated-data-between-a-binary-and-a-continuous-variable

## using library(copula)
# http://stackoverflow.com/questions/10535235/generate-correlated-random-numbers-from-binomial-distributions-in-r

# http://grokbase.com/t/r/r-help/05435gc7c5/r-generating-a-binomial-random-variable-correlated-with-a-normal-random-variable



# 
rm(list=ls())
fn.gambin <- function(corr, g.alpha, g.beta, b.prob, ...){
  np <- length(pars) 
  stopifnot(is.matrix(corr), np==2)
  # z <- chol(corr)%*%matrix(rnorm(np*N), nrow=np)
  z <- mvtnorm::rmvnorm(mean=c(0,0), sigma=corr, n=N)
  # u <- pnorm(t(z))
  u <- pnorm(z)
  
  # rvgamma <- qgamma(u[,1], shape=g.alpha, rate=g.beta)
  rvgamma <- qnorm(u[,1], 0,1)
  rvbern <- as.numeric(u[,2] > b.prob)
  X <- cbind(rvgamma, rvbern)
  if(center.X) X <- scale(x=X, center=center.X)
  return(X)
}

r = 0.8 # correlation coefficient
sigma = matrix(c(1,r,r,1), ncol=2)
s = chol(sigma)
n = 10000
z = s%*%matrix(rnorm(n*2), nrow=2)
u = pnorm(z)

age = qgamma(u[1,], 15, 0.5)
age_bracket = cut(age, breaks = seq(0,max(age), by=5))
success = as.numeric(u[2,]>0.4)
mat <- cbind(age, success)

dat <- fn.gambin(corr=rmat, g.alpha=15, g.beta=0.5, b.prob=0.4)

cor(mat)
colMeans(mat)

rm(list=ls())
fn.norbin <- function(N=1e4, pars=c(1,-1), corr, center.X=FALSE, ...){
  np <- length(pars) 
  stopifnot(is.matrix(corr), np==2)
  z <- mvtnorm::rmvnorm(mean=c(0,0), sigma=corr, n=N)
  u <- pnorm(z)
  
  rvnorm <- qnorm(u[,1])
  rvlogis <- qlogis(u[,2])
  # rvbern <- qbinom(u[,2], size=1, prob=b.prob)
  # rvbeta <- qbeta(u[,2], shape1=1, shape2=1)
  # rvbern <- rbinom(n=length(rvbeta), size=1, prob=rvbeta)
  # rvbern <- as.numeric(rvbeta > 0.5)
  rvbern <- rbinom(n=length(rvbeta), size=1, prob=mean(rvlogis))
  X <- cbind(rvnorm, rvbern)
  if(center.X) X <- scale(x=X, center=center.X)
  return(X)
}

rmat <- diag(2)
r <- 0.5
rmat[1,2] <- rmat[2,1] <- r
dat <- fn.norbin(corr=rmat)
colMeans(dat)
cor(dat)


## using the idea of copula
## http://www.r-bloggers.com/copulas-made-easy/

# using the R package - copula - http://stackoverflow.com/questions/10535235/generate-correlated-random-numbers-from-binomial-distributions-in-r







n <- 1e3 # how many variates

# your binomial variate (example)
size <- 1
prob <- 0.3
vecB <- rbinom(n, size = size, prob = prob)

rho <- 0.5 # desired cor
m <- 0 # mean and sd of Gaussian
sig <- 1
rho <- 2*sin(rho*pi/6) # a small correction
C <- matrix(rho, nrow = 2, ncol = 2)
diag(C) <- 1
C <- chol(C)

# (1) transform binomial to Gaussian
X1 <- qnorm(pbinom(vecB, size = size, prob = prob))
# X1 <- qlogis(pbinom(vecB, size = size, prob = prob))

# (2) create another Gaussian
X2 <- rnorm(n)
X <- cbind(X1,X2)
# (3) induce correlation (does not change X1)
X <- X %*% C
# (4) make uniforms
U <- pnorm(X)
# (5) ... and put them into the inverses
vecB1 <- qbinom(U[,1],size,prob)
vecG <- qnorm(U[,2], mean = m, sd = sig)

# check
# plot(vecB1,vecG)
cor(vecB1,vecG)
all.equal(vecB1,vecB)
sd(vecG)

############################

rm(list=ls())
C <- 5.21
X0 <- rbinom(100,1,0.5) ; 
r <- sum(X0)
Y <- rnorm(100) ; 
Y <- sort(Y)
p0 <- ((1:100)-0.5)/100 ; 
L <- log(p0/(1-p0));
p <- exp(C*L);
p <- p/(1+p);
ix <- sample((1:100),r,replace=FALSE,prob=p)
X1 <- numeric(100); 
X1[ix] <- 1;
corrs <- cor(X1,Y)
corrs



## 
rm(list=ls())
rho <- 0.5
m <- 0
s <- 1
fn <- function(x, mm, ss, rho){
  val <- exp(-x^2/2)/sqrt(2*pi*pnorm(x, mean=mm, sd=ss)*(1-pnorm(x, mean=mm, sd=ss))) - rho
  return(val)
}
root <- uniroot(fn, mm=m, ss=s, rho=rho, lower=0, upper=3)$root
root

ci <- numeric(1000)
mi <- numeric(1000)
for(i in 1:1000){
X0 <- rbinom(1000,1,0.5)
Y <- rnorm(1000); 
Y <- sort(Y)
p0 <- ((1:1000)-0.5)/1000; 
p0 <- p0/sum(p0); 
p <- cumsum(p0)
r <- sum(X0); 
ix<-sample((1:1000),r,replace=FALSE,prob=p)
X1<-numeric(1000); 
X1[ix]<-1;
ci[i] <- cor(X1,Y)
mi[i] <- mean(X1)
}

X <- rnorm(100)
Y <- rnorm(100)
r <- 0.7
# r <- 2*sin(r*pi/6) # a small correction
Y1 <- X*r+Y*sqrt(1-r**2)
cor(X,Y1) # Correlated normals using Cholesky decomposition
cor(X>0.84,Y1) # Method I






################################################################################
##
## Dichotmoous approach
## 
rm(list=ls())
## this method cannot control the probability of bernoulli distribution
rho <- 0.5
m <- 0
s <- 1
fn <- function(x, mm, ss, rho){
  val <- exp(-x^2/2)/sqrt(2*pi*pnorm(x, mean=mm, sd=ss)*(1-pnorm(x, mean=mm, sd=ss))) - rho
  return(val)
}
root <- uniroot(fn, mm=m, ss=s, rho=rho, lower=0, upper=3)$root
root
x <- rnorm(1000)
y <- as.numeric(x>root)
cor(x,y)
mean(x)
mean(y)


## method 2 
require(mvtnorm)
sigm=matrix(c(0.12, 0.05, 0.02, 0.00,
              0.05, 1.24, 0.38,0.00,
              0.02, 0.38, 2.38, 0.03,
              0.00, 0.00,0.03, 0.16),
               ncol=4, byrow=T)


mu=rep(0,4)

#simulated data
dat1 = rmvnorm(1000,mean=mu,sigma=sigm)

#difference between sigmas before dichotimize/categorize
sigm-cov(dat1)
#difference between means before dichotimize/categorize
means1=apply(dat1,2,mean)
mu-means1

#dichotimization and categorization
#lets dichotimize the third variable
#I wantto keep mean the same (0.50)
dat2=dat1
dat2[,3]=ifelse(dat1[,3]>0.0,0,1)

means2=apply(dat2,2,mean)
mu-means2
# I kept the mean same, but look at the difference in cov matricies
sigm-cov(dat2) 

Try the ipeglim package in your browser

Any scripts or data that you put into this service are public.

ipeglim documentation built on May 2, 2019, 4:31 p.m.