R/poissonlognormal.R

Defines functions simpln dpln ldpln ldpln1 dpln1 bootstraprpln rplnmle llrplnall llrpln llpln llplnall aplnmle bspln bootstrapplnconc bootstrappln

Documented in aplnmle bootstrappln bootstrapplnconc bootstraprpln bspln dpln dpln1 ldpln ldpln1 llpln llplnall llrpln llrplnall llrplnall rplnmle rplnmle simpln

#  File degreenet/R/poissonlognormal.R
#  Part of the statnet package, http://statnet.org
#
#  This software is distributed under the GPL-3 license.  It is free,
#  open source, and has the attribution requirements (GPL Section 7) in
#    http://statnet.org/attribution
#
# Copyright 2003 Mark S. Handcock, University of California-Los Angeles
# Copyright 2007 The statnet Development Team
######################################################################
#
# Bootstrap CI for Poisson Lognormal
#
bootstrappln <- function(x,cutoff=1,cutabove=1000,
                          m=200,alpha=0.95,guess=c(0.6,1.2),
                          file="none"){
mle <- aplnmle(x=x,cutoff=cutoff,guess=guess)$theta
bmles <- rep(0,length=m)
for(i in seq(along=bmles)){
 xsamp <- sample(x,size=length(x),replace=TRUE)
 bmles[i] <- aplnmle(x=xsamp,cutoff=cutoff,guess=guess)$theta[1]
}
#
if(!missing(file)){
 save(mle,bmles,file=file)
}
c(quantile(bmles,c(0.5*(1-alpha),(1-alpha),0.5,alpha,0.5+0.5*alpha),na.rm=TRUE),mle)
}
#
# Bootstrap CI for Poisson Lognormal Conc
#
bootstrapplnconc <- function(x,cutoff=1,cutabove=1000,
                          m=200,alpha=0.95,guess=c(0.6,1.2),
                          file="none"){
mle <- aplnmle(x=x,cutoff=cutoff,guess=guess,conc=TRUE)
cmle <- mle$conc
mle <- mle$theta
bmles <- rep(0,length=m)
bconc <- rep(0,length=m)
for(i in seq(along=bmles)){
 xsamp <- sample(x,size=length(x),replace=TRUE)
 tbs <- aplnmle(x=xsamp,cutoff=cutoff,guess=mle,conc=TRUE)
 if(is.na(tbs$theta[1])){bmles[i] <- bmles[i-1]; bconc[i] <- bconc[i-1]}
 else{
  bmles[i] <- tbs$theta[1]
  bconc[i] <- tbs$conc
 }
}
#
if(!missing(file)){
 save(cmle,mle,bconc,bmles,file=file)
}
mle <- c(quantile(bconc,c(0.5*(1-alpha),(1-alpha),0.5,alpha,0.5+0.5*alpha),na.rm=TRUE),cmle,
  mean(bmles < 3.0,na.rm=TRUE))
names(mle)[7] <- "prob < 3"
mle
}
bspln <- function(x, cutoff=1, m=200, np=2, alpha=0.95, v=NULL,
                   hellinger=FALSE){
 if(missing(v)){v <- aplnmle(x=x,cutoff=cutoff,hellinger=hellinger)$theta}
 obsmands <- mands(x,type="pln",v=v,cutoff=cutoff,hellinger=hellinger)
 bsmles <- matrix(0,nrow=m,ncol=np+1)
 for(i in 1:m){
#
# Sample from a Poisson Lognormal CDF at the MLE
#
  xsamp <- sample(x=obsmands$cdf[,"k"], 
                  size=length(x),
                  replace=TRUE,
		  prob=diff(c(0,obsmands$cdf[,"cdf"]))
		 )
  wmle <- aplnmle(x=xsamp,cutoff=cutoff,hellinger=hellinger)$theta
  bsmles[i,-np-1] <- wmle
  bsmles[i, np+1] <- mands(xsamp,type="pln",v=wmle,
                      cutoff=cutoff,hellinger=hellinger)$ads
# if(done){print(paste("done",i))}
 }
 if(hellinger){
  dimnames(bsmles) <- list(1:m,c("LN mean","LN s.d.","Pen. Hell. Dist."))
 }else{
  dimnames(bsmles) <- list(1:m,c("LN mean","LN s.d.","MANDS"))
 }
 #
 list(dist=obsmands$cdf,
  obsmle=v,
  bsmles=bsmles,
  quantiles=quantile(bsmles[,np+1],c(0.5*(1-alpha),0.5,0.5+0.5*alpha),na.rm=TRUE),
  pvalue=sum(obsmands$ads < bsmles[,np+1],na.rm=TRUE)/(sum(bsmles[,np+1]>0,na.rm=TRUE)+1),
  obsmands=obsmands$ads,
  meanmles=apply(bsmles,2,mean)
 )
}
#
# Calculate the Poisson Lognormal law MLE
#
aplnmle <-function(x,cutoff=1,cutabove=1000,guess=c(0.6,1.2),
                   method="BFGS", conc=FALSE, hellinger=FALSE, hessian=TRUE,
		   logn=TRUE){
 if(missing(guess) & hellinger){
  guess <- aplnmle(x=x,cutoff=cutoff,cutabove=cutabove,
   method=method, guess=guess,
   conc=FALSE,hellinger=FALSE)$theta
 }
  if(logn){
   guess[2] <- sqrt(log(1+(guess[2]*guess[2]-guess[1])/(guess[1]*guess[1])))
   guess[1] <- log(guess[1])-0.5*guess[2]*guess[2]
  }
 if(!logn && (guess[2]*guess[2]<=guess[1])){stop("The Poisson-log-normal variance must be greater than the mean")}
 if(logn && guess[2]<=0){stop("The Poisson-log-normal variance must be greater than zero.")}
 if(sum(x>=cutoff & x <= cutabove) > 0){
  aaa <- stats::optim(par=guess,fn=llpln,
#  lower=1.1,upper=30,
#  method="L-BFGS-B",
   method=method,
   hessian=hessian,control=list(fnscale=-10),
   x=x,cutoff=cutoff,cutabove=cutabove, hellinger=hellinger)
  aaanm <- stats::optim(par=guess,fn=llpln,
   hessian=hessian,control=list(fnscale=-10),
   x=x,cutoff=cutoff,cutabove=cutabove, hellinger=hellinger)
  if(aaanm$value > aaa$value){aaa<-aaanm}
  if(logn){
    aaa$par[1] <- exp(aaa$par[1]+0.5*aaa$par[2]*aaa$par[2])
    aaa$par[2] <- aaa$par[1]*sqrt(exp(aaa$par[2]*aaa$par[2])-1)
    aaa$par[2] <- sqrt(aaa$par[1]+aaa$par[2]*aaa$par[2])
  }
  names(aaa$par) <- c("Poisson Lognormal mean","Poisson Lognormal s.d.")
  if(is.psd(-aaa$hessian)){
   asycov <- -solve(aaa$hessian)
   dimnames(asycov) <- list(names(aaa$par),names(aaa$par))
   asyse <- sqrt(diag(asycov))
   asycor <- diag(1/asyse) %*% asycov %*% diag(1/asyse)
   dimnames(asycor) <- dimnames(asycov)
   names(asyse) <- names(aaa$par)
   ccc <- list(theta=aaa$par,asycov=asycov,se=asyse,asycor=asycor)
  }else{
   ccc <- list(theta=aaa$par)
  }
 }else{
  ccc <- list(theta=rep(NA,length=length(x)))
 }
#
 ccc$conc <- NA
 if(conc & exists("aaa")){
 v <- aaa$par
 probv <- v
 xr <- 1:10000
 xr <- xr[xr >= cutoff]
 if(cutoff>0){
  c0 <- 1-sum(dpln(v=probv,x=0:(cutoff-1)))
 }else{
  c0 <- 1
 }
 pdf <- dpln(v=probv,x=xr) / c0
 tx <- tabulate(x+1)/length(x)
 tr <- 0:max(x)
 names(tx) <- paste(tr)
 nc <- tx[tr<cutoff]
 nr <- tr[tr<cutoff]
 p0 <- sum(nc)
 p1 <- sum(nc * nr)
 p2 <- sum(nc * nr^2)
 conc<-(p1+sum(xr*pdf)*(1-p0))/(p2+sum(xr*xr*pdf)*(1-p0)-p1-sum(xr*pdf)*(1-p0))
 ccc$conc <- conc
 }
 ccc
}
#
# Complete data log-likelihoods
#
llplnall <- function(v,x,cutoff=1,cutabove=1000,np=2,logn=TRUE){
 x <- x[x<=cutabove]
 n <- length(x)
 tx <- tabulate(x+1)
 tr <- 0:max(x)
 names(tx) <- paste(tr)
 nc <- tx[tr<cutoff]
 aaa <- sum(nc*log(nc/n),na.rm=TRUE)+(n-sum(nc))*log((n-sum(nc))/n)+llpln(v=v,x=x,cutoff=cutoff,cutabove=cutabove,logn=logn)
 np <- np + cutoff
 aaa <- c(np,aaa,-2*aaa+np*2+2*np*(np+1)/(n-np-1),-2*aaa+np*log(n))
 names(aaa) <- c("np","log-lik","AICC","BIC")
 aaa
}
llpln <- function(v,x,cutoff=1,cutabove=1000,xr=1:10000,hellinger=FALSE,logn=TRUE){
 x <- x[x >= cutoff]
 x <- x[x <= cutabove]
 probv <- v
#probv[2] <- log(probv[2])
#if(probv[1]<=1.01 | probv[2]<=-1 | probv[1] > 50 | probv[2] > 100){
 if(!logn && (probv[1]<= 0.0 | probv[2]*probv[2] < probv[1])){
  out <- -10^10
 }else{
 if(logn && probv[2]<= 0.0){return(-10^10)}
 n <- length(x)
 if(cutoff>0){
  cprob <- 1 - sum(dpln(v=probv,x=0:(cutoff-1),logn=logn))
 }else{
  cprob <- 1
 }
#
 out <- -10^10
 if(hellinger){
  tr <- 0:max(x)
  xr <- tr[tr >= cutoff]
  pdf <- dpln(v=probv,x=xr,logn=logn)
# pdf <- pdf / cprob
  tx <- tabulate(x+1)/length(x)
  pc <- tx[tr>=cutoff]
  out <- -sum(((sqrt(pc) - sqrt(pdf))^2)[pc > 0])
  out <- out - 0.5 * (1-sum(pdf[pc>0])) # penalized Hellinger Distance
 }else{
  xv <- sort(unique(x))
  xp <- as.vector(table(x))
  out <- sum(xp*ldpln(v=probv,x=xv,logn=logn))-n*log(cprob)
 }
 if(is.infinite(out)){out <- -10^10}
 if(is.na(out)){out <- -10^10}
 }
 out
}
#
# Next routines to account for rounding
#
# First rounded Poisson Lognormal
#
llrpln <- function(v,x,cutoff=1,cutabove=1000,xr=1:10000,hellinger=FALSE){
 x <- x[x >= cutoff]
 x <- x[x <= cutabove]
#if(v[1] <= 2 | v[2]<= 0 | v[2] >= 1 ){
 if(v[2]< 0){
  out <- -10^6
 }else{
 probv <- v
#probv[2] <- (probv[1]-2)/probv[2] - (probv[1]-1)
 n <- length(x)
 if(cutoff>0){
  xc <- 0:(cutoff-1)
  aaa <- dpln(v=probv,x=xc)
  cprob <- 1 - sum(aaa)
 }else{
  cprob <- 1
 }
 if(cutabove<1000){
  xc <- 0:cutabove
  aaa <- dpln(v=probv,x=xc)
  cprob <- cprob - 1 + sum(aaa)
 }
#
 out <- -10^6
  tr <- 0:max(x)
# pdf <- dpln(v=probv,x=tr[-1])
  pdf <- dpln(v=probv,x=1:800)
  pdf[ 5] <- sum(dpln(v=probv,x= 4:  6))
  pdf[10] <- sum(dpln(v=probv,x= 7: 13))
  pdf[12] <- sum(dpln(v=probv,x= 8: 16))
  pdf[15] <- sum(dpln(v=probv,x=12: 18))
  pdf[20] <- sum(dpln(v=probv,x=16: 24))
  pdf[25] <- sum(dpln(v=probv,x=20: 29))
  pdf[30] <- sum(dpln(v=probv,x=24: 36))
  pdf[35] <- sum(dpln(v=probv,x=25: 44))
  pdf[40] <- sum(dpln(v=probv,x=30: 49))
  pdf[45] <- sum(dpln(v=probv,x=35: 54))
  pdf[50] <- sum(dpln(v=probv,x=35: 64))
  pdf[55] <- sum(dpln(v=probv,x=40: 69))
  pdf[60] <- sum(dpln(v=probv,x=45: 74))
  pdf[65] <- sum(dpln(v=probv,x=50: 79))
  pdf[70] <- sum(dpln(v=probv,x=55: 84))
  pdf[75] <- sum(dpln(v=probv,x=60: 89))
  pdf[80] <- sum(dpln(v=probv,x=65: 94))
  pdf[85] <- sum(dpln(v=probv,x=70: 99))
  pdf[90] <- sum(dpln(v=probv,x=75:104))
  pdf[95] <- sum(dpln(v=probv,x=80:109))
  pdf[100] <- sum(dpln(v=probv,x=75:124))
  pdf[120] <- sum(dpln(v=probv,x=95:144))
  pdf[130] <- sum(dpln(v=probv,x=105:154))
  pdf[150] <- sum(dpln(v=probv,x=115:184))
  pdf[200] <- sum(dpln(v=probv,x=150:249))
  pdf[300] <- sum(dpln(v=probv,x=250:349))
  pdf[560] <- sum(dpln(v=probv,x=500:639))
  pdf[800] <- sum(dpln(v=probv,x=500:1100))
  pc <- tabulate(x+1, nbins=length(pdf)+1)/n
  pl <- 1:length(pdf)
  pc  <-  pc[-1][pl >= cutoff & pl <= cutabove]
  pdf <- pdf[pl >= cutoff & pl <= cutabove]
  pc[is.na(pc)] <- 0
 if(hellinger){
  pc <- pc / sum(pc,na.rm=TRUE)
  out <- -sum(((sqrt(pc) - sqrt(pdf))^2)[pc > 0])
  out <- out - 0.5 * (1-sum(pdf[pc>0])) # penalized Hellinger Distance
 }else{
  out <- n*sum(pc*log(pdf))-n*log(cprob)
 }
 if(is.infinite(out)){out <- -10^6}
 if(is.na(out)){out <- -10^6}
 }
 out
}
#
# Complete data rounded log-likelihoods
#
llrplnall <- function(v,x,cutoff=1,cutabove=1000,np=2){
 x <- x[x<=cutabove]
 n <- length(x)
 tx <- tabulate(x+1)
 tr <- 0:max(x)
 names(tx) <- paste(tr)
 nc <- tx[tr<cutoff]
 nc <- nc[nc>0]
 aaa <- sum(nc*log(nc/n))+(n-sum(nc))*log((n-sum(nc))/n)+llrpln(v=v,x=x,cutoff=cutoff,cutabove=cutabove)
 np <- np + cutoff
 aaa <- c(np,aaa,-2*aaa+np*2+2*np*(np+1)/(n-np-1),-2*aaa+np*log(n))
 names(aaa) <- c("np","log-lik","AICC","BIC")
 aaa
}
#
# Calculate the rounded Poisson Lognormal law MLE
#
rplnmle <-function(x,cutoff=1,cutabove=1000,guess=c(0.6,1.2),
                   method="BFGS", conc=FALSE, hellinger=FALSE, hessian=TRUE){
 if(missing(guess) & hellinger){
  guess <- rplnmle(x=x,cutoff=cutoff,cutabove=cutabove,
   method=method, guess=guess,
   conc=FALSE,hellinger=FALSE)$theta
 }
 if(sum(x>=cutoff & x <= cutabove) > 0){
  aaa <- stats::optim(par=guess,fn=llrpln,
   method=method,
   hessian=hessian,control=list(fnscale=-10),
   x=x,cutoff=cutoff,cutabove=cutabove, hellinger=hellinger)
  aaanm <- stats::optim(par=guess,fn=llrpln,
   hessian=hessian,control=list(fnscale=-10),
   x=x,cutoff=cutoff,cutabove=cutabove, hellinger=hellinger)
  if(aaanm$value > aaa$value){aaa<-aaanm}
  names(aaa$par) <- c("Poisson Lognormal mean","Poisson Lognormal s.d.")
  if(is.psd(-aaa$hessian)){
   asycov <- -solve(aaa$hessian)
   dimnames(asycov) <- list(names(aaa$par),names(aaa$par))
   asyse <- sqrt(diag(asycov))
   asycor <- diag(1/asyse) %*% asycov %*% diag(1/asyse)
   dimnames(asycor) <- dimnames(asycov)
   names(asyse) <- names(aaa$par)
   ccc <- list(theta=aaa$par,asycov=asycov,se=asyse,asycor=asycor)
  }else{
   ccc <- list(theta=aaa$par)
  }
 }else{
  ccc <- list(theta=rep(NA,length=length(x)))
 }
#
 ccc$conc <- NA
 if(conc & exists("aaa")){
 v <- aaa$par
 probv <- v
#probv[2] <- (probv[1]-2)/probv[2] - (probv[1]-1)
 xr <- 1:10000
 xr <- xr[xr >= cutoff]
 if(cutoff>0){
  c0 <- 1-sum(dpln(v=probv,x=0:(cutoff-1)))
 }else{
  c0 <- 1
 }
 pdf <- dpln(v=probv,x=xr) / c0
 tx <- tabulate(x+1)/length(x)
 tr <- 0:max(x)
 names(tx) <- paste(tr)
 nc <- tx[tr<cutoff]
 nr <- tr[tr<cutoff]
 p0 <- sum(nc)
 p1 <- sum(nc * nr)
 p2 <- sum(nc * nr^2)
 conc<-(p1+sum(xr*pdf)*(1-p0))/(p2+sum(xr*xr*pdf)*(1-p0)-p1-sum(xr*pdf)*(1-p0))
 ccc$conc <- conc
 }
 ccc
}
#
# Bootstrap CI for rounded Poisson Lognormal
#
bootstraprpln <- function(x,cutoff=1,cutabove=1000,
                          m=200,alpha=0.95,guess=c(0.6,1.2),hellinger=FALSE,
                          mle.meth="rplnmle"){
#if(mle.meth!="rplnmlef"){
 aaa <- rplnmle(x=x,cutoff=cutoff,cutabove=cutabove,guess=guess,hellinger=hellinger)$theta
#}else{
# aaa <- rplnmlef(x=x,cutoff=cutoff,cutabove=cutabove,guess=guess,hellinger=hellinger)$theta
#}
bmles <- matrix(0,nrow=m,ncol=2)
for(i in seq(along=bmles[,1])){
 xsamp <- sample(x,size=length(x),replace=TRUE)
# if(mle.meth!="rplnmlef"){
  bmles[i,] <- rplnmle(x=xsamp,cutoff=cutoff,cutabove=cutabove,guess=guess,hellinger=hellinger)$theta
# }else{
#  bmles[i,] <- rplnmlef(x=xsamp,cutoff=cutoff,cutabove=cutabove,guess=guess,hellinger=hellinger)$theta
# }
}
#
rbind(
c(quantile(bmles[,1],c(0.5*(1-alpha),0.5,0.5+0.5*alpha),na.rm=TRUE),aaa),
c(quantile(bmles[,2],c(0.5*(1-alpha),0.5,0.5+0.5*alpha),na.rm=TRUE),aaa)
)
}
#
# Compute the Poisson-Lognormal PMF
#
dpln1 <- function(v,x,cutoff=0,points=25,approxlim=10){
  quad <- gauss.hermite(points,iterlim=1000)
  y <- x
  xr <- x[x<=approxlim]
  gha <- exp(-exp(outer(quad[,1]*v[2]+v[1],xr*v[2]*v[2],"+")))
  y[x<=approxlim] <- exp(xr*v[1]+0.5*xr*xr*v[2]*v[2])*
                  ( quad[,2] %*% gha )/gamma(xr+1)
  xr <- x[x>approxlim]
  xrs <- (log(xr)-v[1])/v[2]
  xrb <- xrs*xrs + log(xr) - v[1] - 1
  y[x>approxlim] <- exp(-0.5*xrs*xrs)*(1+xrb/(2*xr*v[2]*v[2]))/
                                   (sqrt(2*pi)*v[2]*xr)
  if(cutoff>0){
   c0 <- 1-sum(dpln(v=v,x=0:(cutoff-1)))
   y <- y / c0
  }
  y
}
ldpln1 <- function(v,x,cutoff=0){
 log(dpln(v,x,cutoff=cutoff))
}
dpln.refined <- function (v, x, cutoff = 0, points = 25, approxlim = 10) 
{
    quad <- gauss.hermite(points, iterlim = 1000)
    y <- x
    high <- x > approxlim
    if(any(!high)){
     xr <- x[!high]
     yr <- y[!high]
     for(i in seq(along=xr)){
      xri <- xr[i]
      gha <- exp(-exp(quad[,1]*v[2]+v[1]+xri*v[2]*v[2]))
      yr[i] <- exp(xri*v[1]+xri*xri*v[2]*v[2]*0.5)*sum(gha*quad[,2])/gamma(xri+1)
     }
     y[!high] <- yr
    }
    if(any(high)){
     xr <- x[high]
     xrs <- (log(xr) - v[1])/v[2]
     xrb <- xrs * xrs + log(xr) - v[1] - 1
     aaa <- log(1+xrb/(2*xr*v[2]*v[2]))
     y[high] <- exp(-0.5*xrs*xrs + aaa - log(sqrt(2*pi)*v[2]*xr))
    }
    if (cutoff > 0) {
      c0 <- 1 - sum(dpln(v = v, x = rep(0:(cutoff - 1),5)))/5
      y <- y / c0
    }
    y
}
#prpoi <- function(r,mu,sigma,points=10){
#  quad <- gauss.hermite(points)
#  gha <- exp(-exp(quad[,1]*sigma+mu+r*sigma*sigma))
#  (r*mu+r*r*sig2*0.5)*sum(gha*quad[,2])/gamma(r+1)
#}
ldpln <- function(v,x,cutoff=0,logn=TRUE){
 log(dpln(v,x,cutoff=cutoff,logn=logn))
}
dpln <- function(v,x,cutoff=0,logn=TRUE){
  if(!logn){
   # Convert v from observed to log-normal parameters
   if(v[1]<=0 | v[2]<=0){return(x-x)}
   if(v[2]*v[2]>v[1]){
    sigma0 <- sqrt(log(1+(v[2]*v[2]-v[1])/(v[1]*v[1])))
    mu0 <- log(v[1])-0.5*sigma0*sigma0
   }else{
    sigma0 <- 0.0000001
    mu0 <- log(v[1])
   }
  }else{
    sigma0 <- v[2]
    mu0 <- v[1]
  }
# void dpln (int *x, double *mu, double *sig, int *n, double *val);
  y <- .C("dpln",
          x=as.integer(x),
          mu=as.double(mu0),
          sig=as.double(sigma0*sigma0),
          n=as.integer(length(x)),
          val=double(length(x)),
          PACKAGE="degreenet")$val
  if (cutoff > 0) {
    c0 <- 1 - sum(dpln(v = v, x = (0:(cutoff - 1)), cutoff=0))
    y <- y/c0
  }
  return(y)
}
#
# These are the basic simulation routines
#
simpln <- function(n=100, v=c(0.6,1.2), maxdeg=10000, cutoff=1){
  sample(x=cutoff:maxdeg, size=n, replace=TRUE, 
         prob=dpln(v=v,x=cutoff:maxdeg,cutoff=cutoff))
}

Try the degreenet package in your browser

Any scripts or data that you put into this service are public.

degreenet documentation built on June 22, 2024, 11:20 a.m.