R/mergeSplit.R

#K = length of configuration at current step

pX <- function(comp,p){
  sum(sapply(comp, function(x) length(x$X)==p))
}

pY <- function(comp,p){
  sum(sapply(comp, function(x) length(x$Y)==p ))
}

nComp <- function(comp){
  sum(sapply(comp, function(x) length(x$X)>0))
}

outX <- function(comp) {
  sum(table(sapply(comp, function(x) x$X))==0)
}

outY <- function(comp) {
  sum(sapply(comp, function(x) length(x$Y)>=1 & length(x$X)==0))
}

lambda1 <- function(m, p, K, nComp, pComp, merge=0) { 
  #if merge=1, outX and pComp calc before move, nComp calc after move
  #if merge=0, pComp calc after move, outX and nComp calc before move
  #K is initial configuration. Needs to be set large enough so differences aren't negative!
  (
    ( (m+merge) * (nComp + merge) ) / 
     ( (K - pComp) * (p - m + 1 - merge) )
   ) ^ ((-1)^merge)
}

logLambda1 <- function(m, p, K, nComp, pComp, merge=0) { 
  #if merge=1, outX and pComp calc before move, nComp calc after move
  #if merge=0, pComp calc after move, outX and nComp calc before move
  #K is initial configuration. Needs to be set large enough so differences aren't negative!
  #nComp = num components with more than 0 X's and more than 0 Y's (all eligible to be split, essentially)
  #pComp = num of fully saturated Y components (all X's included)
  (
    ( log(m+merge) + log(nComp + merge) ) - 
      ( log(K - pComp) + log(p - m + 1 - merge) )
  ) * ((-1)^merge)
}

# compDens <- function(m,n, h, h0, beta0, alpha0, nu, sigma02, N, X, Y){
#   C <- colSums(X)
#   h0_term <- h0/(N*h0 + 1)
#   
#   A <- n * crossprod(X) - n * h0_term * tcrossprod(C)
#   diag(A) <- diag(A) + 1/h
#   
#   V_part <- alpha0/h0 + colSums(Y)
#   Vh0 <- h0_term * sum(V_part)
#   V <- beta0/h + rowSums(crossprod(X,Y)) - Vh0 * colSums(X)
#   
#   Omega <- sum(beta0^2)/h + sum(alpha0^2)/h + sum(Y^2) - h0_term * sum((V_part)^2) - crossprod(V, solve(A,V))
#   
#   dens <- nu^sigma02 / (nu + 0.5 * Omega)^(sigma02 + N*n * 0.5) * 
#     gamma(sigma02 + N * n *0.5)/ gamma(sigma02) * 
#     (2*pi)^(-n* N * 0.5) * 
#     h^(-m * 0.5) * 
#     (N*h0 +1)^(-n * 0.5) * 
#     det(A)^(-0.5)
#   return(dens)
# }
# 
# logCompDens <- function(m,n, h, h0, beta0, alpha0, nu, sigma02, N, X, Y){
#   if(!is.matrix(Y)) Y <- matrix(Y, nrow=N, ncol=n)
#   if(!is.matrix(X)) X <- matrix(X, nrow=N, ncol=m)
# 
# 
#   h0_term <- h0/(N*h0 + 1)
#   V_part <- alpha0/h0 + colSums(Y)
# 
#   if(dim(X)[2]>1) {
#     C <- colSums(X)
#     A <- n * crossprod(X) - n * h0_term * tcrossprod(C)
#     diag(A) <- diag(A) + 1/h
#     logDetA <- determinant(A)$modulus
# 
#     V <- beta0/h + rowSums(crossprod(X,Y)) - h0_term * sum(V_part) * C
# 
#     Omega <- sum(beta0^2)/h + sum(alpha0^2)/h0 + sum(Y^2) - h0_term * sum((V_part)^2) - crossprod(V, solve(A,V))
# 
#   } else {
#     logDetA <- 0
#     Omega <- sum(alpha0^2)/h + sum(Y^2) - h0_term * sum((V_part)^2)
#   }
# 
#   if(Omega<0) browser()
# 
#   ldens <- sigma02 * log(nu) -
#     (sigma02 + N * n * 0.5) * log(nu + 0.5 * Omega) +
#     lgamma(sigma02 + N * n * 0.5) - lgamma(sigma02) +
#     (-n* N * 0.5) * log(2) + (-n * N * 0.5) * log(pi) +
#     (-m * 0.5) * log(h) +
#     (-n * 0.5) * log(N*h0 + 1) +
#     (-0.5) * logDetA
#   return(ldens)
# }

# move1Prob <- function(oldComp, newComp, m, n, p, K, pComp, nComp, h, h0, beta0, alpha0, nu, sigma02, rho, invariantData,merge=0) {
#   
#   N <- invariantData$N
#   Y <- invariantData$Y
#   XcolSum <- invariantData$XcolSum
#   XcolCross <- invariantData$XcolCross
#   XYcross <- invariantData$XYcross
#   A <- invariantData$A
#   V_part <- invariantData$V_part 
#   V_part_sq <- invariantData$V_part_sq
# 
#   merge.term <- if(merge){-1} else{1}
#   # cat("\nX indices", max(newComp$X), ", ", max(oldComp$X),"\n")
# 
#   C_logLambda1( m,  p,  K,  nComp,  pComp, merge) + 
#         C_logCompDens(m - merge.term, n, h0, h, beta0, alpha0, nu, sigma02, N, 
#                         Y, newComp$Y-1, newComp$X-1, 
#                         V_part, V_part_sq,
#                         XcolCross,
#                         XcolSum,
#                         A, XYcross) - 
#         C_logCompDens(m, n, h0, h, beta0, alpha0, nu, sigma02, N, 
#                         Y, oldComp$Y-1, oldComp$X-1, 
#                         V_part, V_part_sq,
#                         XcolCross,
#                         XcolSum,
#                         A, XYcross) - 
#         n * log(rho) * merge.term
#   
# }

lambda2 <- function(m, n, n1, c, K, p1Y, merge=0) {
  (
    (n-1) * (m/2 + 1) * choose(n, n1) * choose(m, c) * 2^(m - c + 1) * (K - p1Y - merge) /
      (K + 1  - merge * 2) * (K)
  ) ^ ((-1) ^ merge)
}

logLambda2 <- function(m, n, n1, c, K, p1Y, merge=0) {
  (
    log(n-1) + log(m/2 + 1) + lchoose(n, n1) + lchoose(m, c) + log(2)*(m - c + 1) + log(K - p1Y - merge) -
      log(K + 1  - merge * 2) - log(K)
  ) * ((-1) ^ merge)
}

# move2Prob <- function(cmbComp, splitComp, K, m1, n1, m2, n2, c,  p1Y, h, h0, beta0, alpha0, nu, sigma02, rho, invariantData, merge=0) {
#   #if merging, m = m1 + m2 - c, where c is shared components (if splitting, c is zero)
#   #p1Y is the number of Y components with 1 Y variable and >=0 X variables
#   #K already has (1,0) components subtracted off
#   m <- m1 + m2 - c
#   n <- n1 + n2
#   
#   N <- invariantData$N
#   Y <- invariantData$Y
#   XcolSum <- invariantData$XcolSum
#   XcolCross <- invariantData$XcolCross
#   XYcross <- invariantData$XYcross
#   A <- invariantData$A
#   V_part <- invariantData$V_part 
#   V_part_sq <- invariantData$V_part_sq
#   
#   merge.term <- if(merge){-1} else{1}
#   # cat("\nX indices", max(splitComp[[1]]$X), ", ", max(splitComp[[2]]$X),", ", max(cmbComp$X),"\n")
#   # cat("\nY indices", max(splitComp[[1]]$Y), ", ", max(splitComp[[2]]$Y),", ", max(cmbComp$Y),"\n")
#   
#   C_logLambda2(m,  n,  n1,  c,  K,  p1Y, merge) + 
#         (C_logCompDens(m1, n1, h0, h, beta0, alpha0, nu, sigma02, N, 
#                         Y, splitComp[[1]]$Y-1, splitComp[[1]]$X-1, 
#                         V_part, V_part_sq,
#                         XcolCross,
#                         XcolSum,
#                         A, XYcross) +
#           C_logCompDens(m2, n2, h0, h, beta0, alpha0, nu, sigma02, N, 
#                         Y, splitComp[[2]]$Y-1, splitComp[[2]]$X-1, 
#                         V_part, V_part_sq,
#                         XcolCross,
#                         XcolSum,
#                         A, XYcross) -
#           C_logCompDens(m, n, h0, h, beta0, alpha0, nu, sigma02, N, 
#                        Y, cmbComp$Y-1, cmbComp$X-1, 
#                        V_part, V_part_sq,
#                        XcolCross,
#                        XcolSum,
#                        A, XYcross) +
#         log(rho) * (m1 * n1 + m2 * n2 - m * n)) * merge.term
# }

merge1 <- function(K, nC, pC, p, oldComp, priors, invariantData, temp=1) {
  newComp <- oldComp
  
  mXout <- length(newComp$Xout)
  
  mX  <- if(mXout > 0) {
    newComp$Xout[sample.int(mXout, 1)]
  } else {
    integer(0)
  }
  
  newComp$X <- sort(c(mX, newComp$X))
  newComp$Xout <- newComp$Xout[!(newComp$Xout == mX)]
  
  m <- length(oldComp$X)
  n <- length(oldComp$Y)
  
  prob <- move1Prob(oldComp, newComp, m, n, p, K, pC, nC, priors,  invariantData, merge=1)

  return(list(prob=prob, newComp=newComp))
}

split1 <- function(K, nC, pC, p, oldComp, priors, invariantData, temp=1) {
  
  newComp <- oldComp
  m <- length(oldComp$X)
  n <- length(oldComp$Y)
  
  sX <- oldComp$X[sample.int(m,1)]
  newComp$Xout <- sort(c(newComp$Xout, sX))
  newComp$X <- newComp$X[!(newComp$X == sX)]
  
  
  prob <- move1Prob(oldComp = oldComp, newComp = newComp, m=m, n=n, p=p, K=K, 
                    pComp = pC, nComp=nC, priors=priors,  
                    invariantData = invariantData, merge=0)
  return(list(prob=prob, newComp=newComp))
}

move1 <- function(comp, priors, invariantData, compCount, temp=1) {
  
  K   <- compCount$K
  nC <- compCount$nC
  pXcount <- compCount$pXcount
  
  Q <- invariantData$Q
  p <- invariantData$P
  
  merge <- rbinom(n=1, size=1, prob=0.5)
  merge <- ifelse((merge|nC==0) & K>pXcount, 1, 0)
  
  accept <- 0
  sel <- out <- NULL
  
  if(merge) {
    candidates <- which(sapply(comp, function(x) length(x$X)<p))
    
    if(length(candidates) != K-pXcount){
      pXcount <- K - length(candidates)
      compCount$pXcount <- pXcount
    }
    
    sel <- candidates[sample.int(length(candidates), 1)]
    
    out <- merge1(K, nC, pXcount, p, comp[[sel]], priors, invariantData, temp)
    
  } else {
    candidates <- which(sapply(comp, function(x) length(x$X)>0))
    
    if(length(candidates) != nC){
      nC <- length(candidates)
      compCount$nC <- nC
    }
    
    sel <- candidates[sample.int(length(candidates), 1)]

    out <- split1(K, nC, pXcount, p, comp[[sel]], priors, invariantData, temp)
    
  }

  # if(!is.numeric(out$prob) | is.na(out$prob) | is.nan(out$prob)){
  #   # browser()
  #   saves <- as.list(environment())
  #   save(saves, file=paste0(paste("../Data/Outputs/EMERGENCYDUMP_pollution_atleast.RData")))
  #   save(saves, file=paste0(paste("../Data/Outputs/EMERG_pollution","jobid",job.id,"month",month.sample, "season",season.sample, "east", east.sample, "sulfate",sulfate.sample, "iter",n.iter,"chains",n.chain, "chain",array.num,Sys.Date(), sep="_"),".RData"))
  # }
  

  if(-rexp(1) <= out$prob) {
    if(merge) {
      if(length(out$newComp$X)==p) {
        # browser()
        compCount$pXcount <- pXcount + 1
      }
      if(length(comp[[sel]]$X)==0) {
        # browser()
        compCount$nC <- nC + 1
      }
    } else {
      if(length(comp[[sel]]$X)==p) {
        # browser()
        compCount$pXcount <- pXcount - 1
      }
      if(length(out$newComp$X)==0) {
        # browser()
        compCount$nC <- nC - 1
      }
    }
    comp[[sel]] <- out$newComp
    accept <- 1
  }
  
  # if(compCount$pXcount != pX(comp,p) | compCount$p1Y != pY(comp,1)| compCount$nC != nComp(comp)| compCount$K != length(comp)) {
  #   saves <- as.list(environment())
  #   save(saves, file=paste0(paste("../Data/Outputs/EMERGENCYDUMP_pollution_atleast.RData")))
  #   save(saves, file=paste0(paste("../Data/Outputs/EMERG_pollution","jobid",job.id,"month",month.sample, "season",season.sample, "east", east.sample, "sulfate",sulfate.sample, "iter",n.iter,"chains",n.chain, "chain",array.num,Sys.Date(), sep="_"),".RData"))
  #   q("no")
  # }
  
  return(list(comp=comp, accept=accept, compCount = compCount))
}

split2 <- function(K, oldComp, p1Y, priors, invariantData, temp=1) {
  n <- length(oldComp$Y)
  m <- length(oldComp$X)
  
  
  newComp <- lapply(1:2, function(i) list(Y=NULL, X=NULL, Xout=NULL))
  
  n1 <- sample.int(n-1, 1)
  newY <- sort(sample.int(n, n1))
  newComp[[1]]$Y <- unique(oldComp$Y[newY])
  
  newComp[[2]]$Y <- oldComp$Y[-newY]
  
  if(m>0){
    cuppr <- if( (m %% 2)!=0) { (m-1)/2 } else {m/2}
    if(cuppr>0) {
      c <- sample.int(cuppr,1)
      sharedXnum <- sort(sample.int(m, c))
      splitX <- oldComp$X[-sharedXnum]
      sharedX <- oldComp$X[sharedXnum]
    } else{
      c <- 0
      sharedX <- integer(0)
      splitX <- oldComp$X
    }
    stay <- rbinom(m-c, size=1, prob=0.5)
    changeX <- splitX[!stay]
    stayX <- splitX[stay==1]
    
    newComp[[1]]$X <- sort(unique(c(changeX, sharedX)))
    newComp[[1]]$Xout <- sort(unique(c(stayX, oldComp$Xout)))
  
    newComp[[2]]$X <- sort(unique(c(stayX, sharedX)))
    newComp[[2]]$Xout <- sort(unique(c(oldComp$Xout, changeX)))
  
    m2 <- sum(stay) + c
    m1 <- m - m2 + c
  } else {
    c <- m2 <- m1 <- m
    newComp[[1]]$Xout <- newComp[[2]]$Xout <- oldComp$Xout
    newComp[[1]]$X <- newComp[[2]]$X <- integer(0)
  }
  n1 <- length(newY)
  n2 <- n - n1
  
  prob <- move2Prob(oldComp, newComp, K, m1, n1, m2, n2, c, p1Y, priors, invariantData, merge=0)

    return(list(prob=prob, newComp=newComp))
}

merge2 <- function(K, oldComp, p1Y, priors, invariantData, temp=1) {
  
  newComp <- list(Y=sort(unlist(lapply(oldComp, function(y) y$Y))), 
                  X=sort(unique(unlist(lapply(oldComp, function(x) x$X)))), 
                  Xout = sort(unique(unlist(lapply(oldComp, function(x) x$Xout)))))
  
  newComp$Xout <- newComp$Xout[!(newComp$Xout %in% newComp$X)]
  n  <- length(newComp$Y)
  n1 <- length(oldComp[[1]]$Y)
  n2 <- length(oldComp[[2]]$Y)
  m1 <- length(oldComp[[1]]$X)
  m2 <- length(oldComp[[2]]$X)
  
  m <- length(newComp$X)
  c <- m1 + m2 - m
  
  p1Y <- p1Y - (n1 == 1) - (n2 == 1)
  
  
  prob <- move2Prob(newComp, oldComp, K, m1, n1, m2, n2, c, p1Y, priors, invariantData, merge=1)
  
  return(list(prob=prob, newComp=newComp))
}

move2 <- function(comp, priors, invariantData, compCount, temp=1) {
  K   <- compCount$K
  Q <- invariantData$Q
  p <- invariantData$P
  
  merge <- rbinom(n=1, size=1, prob=0.5)
  accept <- 0

  p1Y <- compCount$p1Y
  

  if ((K == Q | merge) & K >1) {
    merge <- 1
    sel <- sample.int(K, 2)

    mergeout <- merge2(K=K, oldComp=comp[sel], p1Y=p1Y, priors = priors, 
                       invariantData = invariantData, temp = temp)
    
    if(-rexp(1) <= mergeout$prob) {
      compCount$K <- K - 1
      
      partP1Y <- pY(comp[sel],1)
      if(partP1Y>0){
        # browser()
        compCount$p1Y <- p1Y-partP1Y
      }
      
      if(pX(comp[sel], 0)==0) compCount$nC <- compCount$nC - 1
      
      if(length(mergeout$newComp$X)==p & pX(comp[sel],p)==0){
        compCount$pXcount <- compCount$pXcount + 1
      }
      
      comp[[sel[1]]] <- mergeout$newComp
      comp[[sel[2]]] <- NULL
      
      accept <- 1
    }
    
  } else {
    merge <- 0
    candidates <- which(sapply(comp, function(y) length(y$Y)>1))
    sel <- candidates[sample.int(length(candidates), 1)]
    split <- split2(K, comp[[sel]], p1Y, priors, invariantData, temp)
    
    if(-rexp(1) <= split$prob) {
      compCount$K <- K + 1
      
      partP1Y <- pY(split$newComp,1)
      if(partP1Y>0){
        compCount$p1Y <- p1Y + partP1Y
      }
      if(pX(split$newComp,0)==0) compCount$nC <- compCount$nC + 1
      
      if(pX(split$newComp,p)>1) compCount$pXcount <- compCount$pXcount + 1
      
      comp[[compCount$K]] <- split$newComp[[1]]
      comp[[sel]]         <- split$newComp[[2]]
      
      accept <- 1
    }
  }
  
  # if(compCount$pXcount != pX(comp,p)| compCount$p1Y != pY(comp,1)| compCount$nC != nComp(comp)| compCount$K != length(comp)) {
  # 
  #   saves <- as.list(environment())
  #   save(saves, file=paste0(paste("../Data/Outputs/EMERGENCYDUMP_pollution_atleast.RData")))
  #   save(saves, file=paste0(paste("../Data/Outputs/EMERG_pollution","jobid",job.id,"month",month.sample, "season",season.sample, "east", east.sample, "sulfate",sulfate.sample, "iter",n.iter,"chains",n.chain, "chain",array.num,Sys.Date(), sep="_"),".RData"))
  #   q("no")
  # }
    
  return(list(comp=comp, accept=accept, compCount = compCount))
}
eifer4/stochasticSampling documentation built on May 14, 2019, 11:16 a.m.