R/Mle_and_simulation_generic_functions.R

Defines functions generate.observations sample.loglikelihood.plugin sample.loglikelihood full.loglikelihood pop.loglikelihood loglikethetaxi deriveloglikethetaxi deriveloglikethetaxi2 rhoderiveloglikethetaxi2 Deriveloglikethetaxi2 RhoDeriveloglikethetaxi2 Imatrixf Imatrix1 Imatrix2 Imatrix3 Imatrix4 Imatrix6 Imatrix7 Imatrix9 calcule.Sigma calculeV cav fullMLE sampleMLE pseudoMLE simule Simulation_data simulation.summary

Documented in Simulation_data simulation.summary

#####################################################################
##  5.1 Maximum Likelihood Estimation generic functions
#####################################################################
#    Entries:
#      - m : a model, i.e. a list that contains at least
#              - the rhothetaxi function, that is a function of a 
#                vecteur y, and parameters theta and xi
#              - a density function dloitheta
#      - y : a vecteur of obsercations
#      - theta
#      - xi
#      - thetaxi: concatenation of theta and xi
###################################################################
##generate observations
generate.observations<-function(model){
  Y<-model$rloiy();  #Y generation
  Z<-model$rloiz(Y); #Z generation
  S<-model$Scheme$S(Z);     #sample selection
  Pik<-model$Scheme$Pik(Z)
  list(y=as.matrix(Y)[S,],z=as.matrix(Z)[S,],pik=Pik[S])}
## 1. Computations related to loglikelihood
##1.1 Calculus of the plugin sample log likelihood function
sample.loglikelihood.plugin<-function(theta,y,model,xi){sum(log(model$rhothetaxi(y,theta,xi))+log(model$dloitheta(y,theta)))}
##1.2 Calculus of the  sample log likelihood function
sample.loglikelihood<-function(thetaxi,obs,model){model$sampleloglikelihood(Obs,theta=thetaxi[1:length(model$theta)],xi=thetaxi[length(model$theta)+(1:length(model$xi))])}
full.loglikelihood<-function(thetaxi,obs,model){model$fullloglikelihood(Obs,theta=thetaxi[1:length(model$theta)],xi=thetaxi[length(model$theta)+(1:length(model$xi))])}
##1.3. Calculus of the mean population log likelihood
pop.loglikelihood<-function(theta,y,model,xi){return(sum(log(model$dloitheta(y,theta))))}
##1.4. Calculus of the derivative of the loglikelihood for one observation
loglikethetaxi <- function(thetaxi,model,y){
  log(model$rhothetaxi(y,thetaxi[1:length(model$theta)],thetaxi[length(model$theta)+1:length(model$xi)])*model$dloitheta(y,thetaxi[1:length(model$theta)]))}
deriveloglikethetaxi  <- function(y,model,theta,xi){
  numDeriv::jacobian(c(theta,xi),func=loglikethetaxi,model=model,y=y)}
##1.5 Second order derivative for one observation
deriveloglikethetaxi2 <- function(y,model,theta,xi){numDeriv::hessian (c(theta,xi),func=loglikethetaxi,model=model,y=y)}
##1.5 Second order derivative for one observation          
rhoderiveloglikethetaxi2<-function(y,model,theta,xi){
  model$rhothetaxi(y,theta,xi)*deriveloglikethetaxi2(y,model,theta,xi)}
##1.6 Version of previous functions for many observations  
Deriveloglikethetaxi <- deriveloglikethetaxi
#function(y,model,theta,xi){
# if(is.matrix(y)){return(t(apply(y,MARGIN=1,deriveloglikethetaxi,model=model,theta=theta,xi=xi)))} 
# if(is.vector(y)){return(deriveloglikethetaxi(y,model,theta,xi))}}

Deriveloglikethetaxi2 <-function(y,model,theta,xi){
  if(is.vector(y)){return(deriveloglikethetaxi2(y,model,theta,xi))} 
  if(is.matrix(y)){return(plyr::aaply(y,1,deriveloglikethetaxi2,model=model,theta=theta,xi=xi))}}

RhoDeriveloglikethetaxi2<-function(y,model,theta,xi){
  if(is.vector(y)){return(deriveloglikethetaxi2(y,model=model,theta,xi))} 
  if(is.matrix(y)){return(plyr::aaply(y,1,rhoderiveloglikethetaxi2,model=model,theta=theta,xi=xi))}}   
## Computation of information matrices

Imatrixf<-function(model,nbrepI=300,method=NULL){
  if(is.null(method)){method<-"FirstDeriv"}
  if(method=="FirstDeriv"){Imatrix7(model,nbrepI)}else{
    if(method=="formula"&!is.null(model$cave$Imatrix)){model$cave$Imatrix}}}

Imatrix1<-function(model,nbrepI=300){#3 minutes for n=30
  xx<-plyr::raply(nbrepI,
                  (function(){
                    y<-model$rloiy()
                    dd<-deriveloglikethetaxi(y,model,model$theta,model$xi)
                    rhorho=model$rhothetaxi(y,model$theta,model$xi)
                    return(t(dd)%*%(dd*rhorho)/model$conditionalto$N)})())
  return(apply(xx,2:length(dim(xx)),mean))}
#compute the information matrix in different ways
Imatrix2<-function(model,nbrepI=300){
  y<-do.call(rbind,plyr::rlply(nbrepI,as.matrix(model$rloiy())))
  xxx=RhoDeriveloglikethetaxi2(y,model,theta,xi)
  return( -apply(xxx,2:length(dim(xxx)),mean))}

Imatrix3<-function(model,nbrepI=300){
  y<-do.call(rbind,plyr::rlply(nbrepI,as.matrix(model$rloiy())))
  dd<-deriveloglikethetaxi(y,model,model$theta,model$xi)
  rhorho=model$rhothetaxi(y,model$theta,model$xi)
  return(t(dd)%*%(dd*rhorho)/(nbrepI*N))}

Imatrix4<-function(model,nbrepI=300){
  y<-do.call(rbind,plyr::rlply(nbrepI,as.matrix(model$rloiy())))
  s<-model$Scheme$S(model$rloiz(y)) #draw a sample
  return( -apply(Deriveloglikethetaxi2(as.matrix(y)[s,],model,theta,xi),c(1,2),mean))}

Imatrix6<-function(model,nbrepI=300){
  y<-do.call(rbind,plyr::rlply(nbrepI,as.matrix(model$rloiy())))
  s<-model$Scheme$S(model$rloiz(y))
  nrep=length(s)
  dd<-deriveloglikethetaxi(y[s,],model,theta,xi)
  return(t(dd)%*%(dd)/nrep)}

Imatrix7<-function(model,nbrepI=300){
  y<-do.call(rbind,plyr::rlply(nbrepI,as.matrix(model$rloiy())))
  s<-model$Scheme$S(model$rloiz(y))
  dd<-deriveloglikethetaxi(as.matrix(y)[s,],model,model$theta,model$xi)
  return(t(dd)%*%(dd)/nrow(dd))}


Imatrix9<-function(model,nbrepI=300){
  y<-do.call(rbind,plyr::rlply(nbrepI,model$rloiy()))
  dd<-Deriveloglikethetaxi(y,model,model$theta,model$xi)
  rhorho <- model$rhothetaxi(y,model$theta,model$xi)*model$rhoxthetaxi(y,model$theta,model$xi)
  return(t(dd)%*%(dd*rhorho)/(model$conditionalto$N*nbrepI))}




if(FALSE){
  N<-5000;nbrepI=300
  compare<-sapply(sapply(paste("Imatrix",c(1:7,9),sep=""),get),function(x){
    try(timex<-system.time(I<- x(N,model,nbrepI,x=x)))
    list(time=timex,I=I)    
  })
  system.time(I1 <- Imatrix1(N,model,nbrepI,x=x));#user 10.47 s
  system.time(I2 <- Imatrix2(N,model,nbrepI=300,x=x));#user :3.804
  system.time(I3 <- Imatrix3(N,model,nbrepI,x=x));#user 
  system.time(I4 <- Imatrix4(N,model,nbrepI=300,x=x));#user
  system.time(I5 <- Imatrix5(N,model,nbrepI,x=x))#user 
  system.time(I6 <- Imatrix6(N,model,nbrepI,x=x));#user 0.012
  system.time(I7 <- Imatrix7(N,model,nbrepI=300,x=x));#user 0.012
  system.time(I9 <- Imatrix9(N,model,nbrepI=300,x=x));#user 0.052
  lapply(paste("I",c(1:3,6,7,9),sep=""),get);
}


calcule.Sigma<-function(model,nbrepSigma=1000,method=list(I="MC",Sigma="MC")){
  suppressMessages(attach(model$conditionalto))
  return(N *model$tau*
           var(plyr::aaply(
             plyr::raply(nbrepSigma,
                         function(){
                           y=model$rloiy()   #generates y conditionnally to x
                           z=model$rloiz(y)       #generates z conditionnally to x and y
                           s <- model$Scheme$S(z);#draws the sample
                           pi <- model$Scheme$Pik(z);# compute the inclusion probabilities            
                           return(apply(cbind(Deriveloglikethetaxi(as.matrix(y)[s,],model,model$theta,model$xi),
                                              model$xihatfunc1(as.matrix(y)[s,],as.matrix(z)[s,],pi[s])),2,mean))}),
             1,function(u){c(u[1:length(model$theta)],
                             model$xihatfunc2(u[length(model$theta)+length(model$xi)+(1:model$xihatfuncdim)]))})))}

calculeV<-function(Sigma,Im,dimtheta){  
  dimxi<-length(Sigma[1,])-dimtheta
  Sigma11<-Sigma[1:dimtheta,1:dimtheta];
  Sigma22<-Sigma[dimtheta+dimxi,dimtheta+dimxi];
  Sigma12<-Sigma[1:dimtheta,dimtheta+dimxi]
  I11<-Im[1:dimtheta,1:dimtheta];
  I12<- as.matrix(Im[1:dimtheta,dimtheta+dimxi])
  
  V <-   solve(I11)%*%(Sigma11+I12%*%Sigma22%*%t(I12)-I12%*%t(Sigma12)-Sigma12%*%t(I12))%*%solve(I11)
  V1 <-  solve(I11)%*%(Sigma11)%*%solve(I11)
  V2 <-  solve(I11)%*%(I12%*%Sigma22%*%t(I12))%*%solve(I11)
  V3 <-  solve(I11)%*%(-I12%*%t(Sigma12)-Sigma12%*%t(I12))%*%solve(I11)
  return(list(V=V,V1=V1,V2=V2,V3=V3))}


cav<-function(model,nbrepSigma=300,nbrepI=300,method=list(Sigma="MC")){
  method<-list(Sigma=if(is.null(method$Sigma)){if(!is.null(model$Sigma)){"formula"}else{"MC"}}else{method$Sigma},
               I=if(is.null(method$Sigma)){if(!is.null(model$I)){"formula"}else{"FirstDeriv"}}else{method$I})
  suppressMessages(attach(model$conditionalto))
  Sigma <- calcule.Sigma(model,nbrepSigma,method$Sigma)
  Imatrix <- Imatrixf(model,method=method$I)
  V123<-calculeV(Sigma,Imatrix,length(model$theta))
  V<-list(Sample=V123$V/(N*model$tau),Pseudo=NA,Naive=NA,Full=NA)
  return(list(Sigma=Sigma,Im=Imatrix,V=V,
              V1=V123$V1/(N*model$tau),V2=V123$V2/(N*model$tau),V3=V123$V3/(N*model$tau)))}


##5. Optimisation procedure : computation of the maximum likelihood estimator
fullMLE<-function(Obs,model,method=NULL){
  if(is.null(method)){method="nlm"}
  ys<-as.matrix(Obs$y)
  if(method=="formula"){model$fullMLE(Obs)}else{
    if(!is.null(model$fulllikelihood)){optimx::optimx(c(model$theta,model$xi),
                                                      fn =full.loglikelihood,control=list(maximize=TRUE,method=method),model=model,Obs=Obs)}else{NA}}}
sampleMLE<-function(Obs,model,method=NULL,xi.hat=NULL){
  if(is.null(method)){method="nlm"}
  if(method=="formula"){model$sampleMLE(Obs)}else{
    if(is.null(xi.hat)){xi.hat<-model$xihat(Obs)}
    unlist(optimx::optimx(model$theta,
                          fn=sample.loglikelihood.plugin,method=method,
                          control=list(maximize=TRUE),
                          y=Obs$y,model=model,xi=xi.hat))[1:length(model$theta)]}}
pseudoMLE<-function(Obs,model,method=NULL){
  if(is.null(method)){method="nlm"}
  if(method=="formula"){model$pseudoMLE(Obs)}else{
    unlist(optimx::optimx(model$theta,
                          fn=pseudo.loglikelihood,method=method,
                          control=list(maximize=TRUE),
                          y=Obs$y,model=model))}}

#6. Simulation procedure
# Entry :

#  - model: a population model, i.e. a list that contains at least:
#      - a function rloiy to generate the population study variable
#      - a function rloiz that generates the population design variable
#      - Scheme a list that contains
#        - a function Pik
#        - a Scheme function S that given a vector of design variables
#          returns a random sample
#  - N a population size
#  - nbreps : number of replicates
#  - method : name of the method for optimisation
#    ("grille", "Grille It", 

simule<-function(model,
                 nbreps=300,
                 method=NULL){
  method=list(Sample=if(is.null(method$Sample)){"nlm"}else{method$Sample},
              Pseudo=if(is.null(method$Pseudo)){if(is.null(model$thetaht))   {"nlm"}else{"formula"}}else{method$Pseudo},
              Naive =if(is.null(method$Naive )){if(is.null(model$thetaniais)){"nlm"}else{"formula"}}else{method$Naive },
              Full  =if(is.null(method$Full  )){if(is.null(model$thetafull)) {"nlm"}else{"formula"}}else{method$Full  })
  #Set the precision (used in optimisation procedure)
    suppressMessages(attach(model))
    suppressMessages(attach(Scheme))
    Estim <- plyr::rlply(nbreps,
                       (function(){
                         #Population generation and sample selection
                         Obs<-generate.observations(model)
                         thetaxi.full=fullMLE(Obs,model,method$Full)
                         xi.hat   =xihat     (Obs)
                         try(capture.output(Sample<-sampleMLE(Obs,model,method$Sample,xi.hat=xi.hat)))
                         return(list(xi.hat   =xi.hat,
                                     Pseudo =thetaht(Obs),
                                     Naive=thetaniais(Obs),
                                     Sample=Sample,
                                     xi.full=thetaxi.full[length(model$theta)+(1:length(model$xi))],
                                     Full=thetaxi.full[1:length(model$theta)]))})(),
                       .progress = "text")
  noms<-names(Estim[[1]])
  Estim<-Estim[sapply(Estim,function(l){!is.character(l$Sample)})]
  Estim<-lapply(as.list(noms),function(nom){plyr::laply(Estim,function(ll){ll[[nom]]})})
  names(Estim)<-noms
    suppressMessages(attach(Estim))
  Var<-lapply(Estim,var,na.rm=TRUE)
  M=lapply(Estim,function(est){apply(as.matrix(est),2,mean,na.rm=TRUE)})
  E=list(model$xi,model$theta,model$theta,model$theta,model$xi,model$theta)
  names(E)<-names(M)
  Bias=lapply(as.list(noms),function(x){M[[x]]-E[[x]]})
  names(Bias)<-names(M)
  MSE=lapply(as.list(noms),function(x){Var[[x]]+Bias[[x]]%*%t(Bias[[x]])})
  names(MSE)<-names(M)
  return(list(
    model=model,
    Estim=Estim,
    Variance=Var,
    Mean=M,
    Bias=Bias,
    E=E,
    "M.S.E."=MSE))}

#7. Simulations and output
Simulation_data<-function(popmodelfunction,theta,xi,conditionalto,
                          method=NULL,nbreps=3000,nbrepI=3000,nbrepSigma=1000){
  model<-popmodelfunction(theta,xi,conditionalto)
  message("1. compute the information matrix, Sigma, and the approximated variance of sample MLE" )
  cave <- cav(model,nbrepSigma=nbrepSigma,nbrepI=nbrepI,method)
  message(paste0("2. replicates ",nbreps," times: generates the study variables, generates the design variables conditional to study variables, draw sample, compute all estimates of theta"))
  suppressWarnings(sim<-simule(model,nbreps=nbreps,method))
  return(list(model=model,xi=xi,sim=sim,cave=cave))}

simulation.summary<-function(table_data){
    ll<-c(table_data$sim[c("Mean","Bias","Variance","M.S.E.","E")],table_data$cave["V"])
    X<-do.call(rbind,
               lapply(c("Naive","Pseudo","Sample","Full"),function(est){
                 do.call(data.frame,c(list(Estimator=est),
                                      list("theta"=as.array(list(table_data$model$theta))),
                                      list("xi"=as.array(list(table_data$model$xi))),
                                      list("Contidional to"=as.array(list(table_data$model$conditionalto))),
                                      list("Mean"=as.array(list(signif(ll$Mean[est][[1]],3)))),
                                      list("% Relative Bias"=as.array(list(signif(100*ll$Bias[est][[1]]/ll$E[est][[1]],3)))),
                                      list("RMSE Ratio"=as.array(list(signif(diag(as.matrix(ll$"M.S.E."[est][[1]]))/diag(as.matrix(ll$"M.S.E"["Sample"][[1]],3)))))),
                                      list("Empirical Variance"=as.array(list(signif(diag(as.matrix(ll$Variance[est][[1]],3)))))),
                                      list("Asymptotic Variance"=as.array(list(signif(diag(as.matrix(ll$V[est][[1]],3))))))))}))
    names(X)<-c("Estimator",
                "$\\theta$","$\\xi$",paste0("Conditional to (", paste(names(unlist(table_data$model$conditionalto)),collapse=","),")"),"Mean","% Relative Bias","RMSE Ratio","Empirical Variance","Asymptotic Variance")
    X}
DanielBonnery/pubBonneryBreidtCoquet2016 documentation built on May 6, 2019, 1:35 p.m.