R/DPbasics.R

Defines functions rDPdisc dDPdisc cdDPdisc DPdisc DPweight

Documented in cdDPdisc dDPdisc DPdisc DPweight rDPdisc

#' weight by stick-breaking process
#'
#' @param M a numeric / precision parameter
#' @param K a scalar / number of weights
#' @return  a numeric vector / weights of length K
DPweight <- function(M=1,K=10){
  vh <- rbeta(K-1,1,M)
  wh <- rep(0,K-1)
  wh[1] <- vh[1]
  stick <- 1-wh[1]
  for(i in 2:(K-1)){
    wh[i] <- vh[i]*stick
    stick <- stick-wh[i]
  }
  return(c(wh,stick))
}

#' Discrete density estimation
#'
#' @param M numeric / precision parameter
#' @param supp numeric vector / support on whcih you obtain pmf)
#' @param G0 numeric vecotr / Centering dist. (pmf on the support)
#' @param kmax scalar / number of weight to obatin pmf
#' @return a numeric vector / p.m.f on the support of input
# debugging
# M=1; supp <- seq(0,5,by=1) ; G0 <- "discrete uniform" ; kmax = 1000# dpois(x,lambda = 1)
DPdisc <- function(M=1, supp=NULL, G0=NULL, kmax=1000){
  if(is.null(supp)) supp= 1:length(G0)
  if(length(supp)==1) supp <- 1:supp
  if(is.null(G0)) G0 <- rep(1/length(supp),length(supp))
  tb <- cbind(sample(supp,kmax,replace=TRUE,prob=G0),DPweight(M=M,K=kmax))
  prob <- sapply(seq_along(supp), function (i) sum(tb[tb[,1]==supp[i],2]))
  names(prob) <- format(supp)
  prob
}




#' DP conditional distribution for discrete case
#'
#' @param y numeric vector / a sequence of y values to be evaluated
#' @param M numeric / precision parameter
#' @param supp numeric vector / support of y
#' @param G0 numeric vector / centering dist. on support y
#' @return list of one numeric vector and one scalar / conditional dist. and probability of p(yi | y_{-1})
# debugging
# y <-c(2,3,4,5,4,5,6,4,4,3,3) ; supp=NULL ; G0 =NULL ; M=1
cdDPdisc <- function(y, M=1, supp = NULL, G0 = NULL){
  n <- length(y)
  if(is.null(supp)) supp <-  unique(y)
  if(is.null(G0)) G0 <- rep(1/length(supp),length(supp))
  if(n==1){
    list(dist=1, prob=1)
  }else{
    y.old <- y[-n]
    cprob <- vapply(seq_along(supp),function(i) sum(y.old == supp[i]),FUN.VALUE = numeric(1))/(M+n-1)+M*G0/(M+n-1)
    names(cprob) <- format(supp)
    value <- cprob[supp==y[n]]
    list(dist=cprob, prob=value)
  }
}

#' DP marginal distribution for discrete case
#'
#' @param y a numeric vector / a sequence of y values to be evaluated
#' @param M a numeric / precision parameter
#' @param supp a numeric vector / support of y
#' @param G0 numeric vector / centering dist. on support y
#' @return a scalar / mariginal probabiltyprobability of p(yi | y_{-1})
# debugging
# supp <- 1:3 ; y <- sample(supp,5,replace=TRUE) ; G0 <- rdirich(1,c(1,2,3)) ; M=1
dDPdisc <- function(y, M=1, supp=NULL, G0=NULL, log = FALSE){
  if(is.null(supp)) supp <-  unique(y)
  if(is.null(G0)) G0 <- rep(1/length(supp),length(supp))
  prob <- prod(vapply(seq_along(y),FUN=function(i) cdDPdisc(y[1:i],supp=supp,M=M,G0=G0)$prob, FUN.VALUE=numeric(1)))
  if(log==TRUE){
    log(prob)
  }else{
    prob
  }
}


#' DP Random variable generator for discrete case
#'
#' @param n a scalar / number of random variables to be generated
#' @param M a numeric / precision parameter
#' @param supp a numeric vector / support of y
#' @param G0 a numeric vector / centering dist. on support y
#' @return  a numeric vector / random variables of length n
# debugging
#n=10 ; M=20 ; supp <- 3 ; G0=NULL
rDPdisc <- function(n, M=1, G0=NULL, supp=NULL){
  if(is.null(supp)) supp= 1:length(G0)
  if(length(supp)==1) supp <- 1:supp
  if(is.null(G0)) G0 <- rep(1/length(supp),length(supp))
  y <- numeric(n)
  y[1] <- sample(supp,size=1,prob = G0)
  pool <- as.vector(y[1])
  for(i in 2:n){
    if(runif(1)<(i-1)/(M+i-1)){
      y[i] <- sample(x=pool,size=1,prob=rep(1/(i-1),i-1))
    }  else {
      y[i] <- sample(x=supp,size=1,prob = G0)
    }
    pool <- c(pool,y[i])
  }
  return(pool)
}
morner75/BNP documentation built on May 29, 2020, 7:27 p.m.