R/gbmt.R

Defines functions posterior plot.gbmt t_col areColors predict.gbmt BIC.gbmt AIC.gbmt extractAIC.gbmt logLik.gbmt fitted.gbmt residuals.gbmt coef.gbmt summary.gbmt print.gbmt gbmt gbmtFit wlsFit pldBound pruneFun icCalc nparCalc appaCalc imputFun smoothFun logdmN scaleFun_inv scaleFun

Documented in gbmt plot.gbmt posterior predict.gbmt

# rescaling (auxiliary)
scaleFun <- function(x, scaling, par) {
  mu <- par[1]
  sig <- par[2]
  if(scaling==1) {
    # centering
    x-mu
    } else if(scaling==2) {
    # standardization
    (x-mu)/sig  
    } else if(scaling==3) {
    # division by the mean
    x/mu
    } else if(scaling==4) {
    # log centering
    log(x)-log(mu)
    } else {
    x  
    }
  }

# inverse rescaling (auxiliary)
scaleFun_inv <- function(x, scaling, par) {
  if(scaling==1) {
    # centering
    x+par[1]
    } else if(scaling==2) {
    # standardization
    x*par[2]+par[1]
    } else if(scaling==3) {
    # division by the mean
    x*par[1]
    } else if(scaling==4) {
    # log division by the mean
    exp(x)*par[1]
    } else {
    x  
    }
  }

# multinormal density (auxiliary)
logdmN <- function(x,mu,S) {
  k <- ncol(S)
  dS <- det(S)
  if(dS<=0) ldS <- -7.451e+2 else ldS <- log(dS)
  cost <- -k/2*log(2*pi)-0.5*ldS
  W <- solve(S)
  ll <- c()
  for(i in 1:nrow(x)){
    idx <- as.vector(t(x[i,]-mu[i,]))
    ll[i] <- cost-0.5*t(idx)%*%W%*%(idx)
    }
  ll
  }

# smooth covariance matrix (auxiliary)
smoothFun <- function(S) {
  tt <- tryCatch(solve(S), error=function(e){e})
  if(is(tt,"error")) {
    #print("smoothed")
    as.matrix(nearPD(S)$mat)
    } else {
    S
    }
  }

# function for imputation (auxiliary)
imputFun <- function(xdat, tval, beta, S, d) {
  newdat <- xdat
  for(i in 1:nrow(xdat)) {
    iNA <- which(is.na(xdat[i,]))
    if(length(iNA)>0) {
      ix <- xdat[i,]
      it <- c(1,tval[i]^(1:d))
      imu <- t(beta)%*%it
      iO <- which(!is.na(xdat[i,]))
      if(length(iO)>0) {
        s22 <- S[iO,iO,drop=F]
        s22_inv <- solve(s22)
        ijdiff <- as.vector(t(ix[iO]-imu[iO]))
        newdat[i,iNA] <- imu[iNA]+S[iNA,iO,drop=F]%*%s22_inv%*%ijdiff
        } else {
        newdat[i,iNA] <- imu[iNA]
        }
      }
    }
  newdat
  }

# compute appa (auxiliary)
appaCalc <- function(tab) {
  cl <- apply(tab,1,which.max)
  appa <- c()
  for(i in 1:ncol(tab)) {
    appa[i] <- mean(tab[which(cl==i),i])
    }
  names(appa) <- colnames(tab)
  appa
  }

# compute number of parameters (auxiliary)
nparCalc <- function(beta) {
  ng <- length(beta)
  np <- ncol(beta[[1]])
  sum(sapply(beta,function(z){sum(z!=0)}))+ng*(1+np*(np+1)/2)-1
  }

# compute information criteria (auxiliary)
icCalc <- function(logLik, npar, ss) {
  a <- -2*logLik
  c(aic=a+2*npar,
    bic=a+npar*log(ss),
    #aicc=a+2*npar*(1+(npar+1)/(ss-npar-1)),
    caic=a+npar*(log(ss)+1),
    ssbic=a+npar*log((ss+2)/24),
    hqic=a+npar*2*log(log(ss)))
  }

# pruning (auxiliary)
pruneFun <- function(beta, posterior, data) {
  ng <- length(beta)
  nomi <- colnames(beta[[1]])
  pnam <- rownames(beta[[1]])
  d <- nrow(beta[[1]])-1
  cou <- rownames(posterior)
  t <- as.numeric(data[,2])-min(as.numeric(data[,2]))+1
  weiMat <- matrix(nrow=nrow(data),ncol=ng)
  for(i in 1:length(cou)) {
    ind <- which(data[,1]==cou[i])
    for(j in 1:ng) {
      weiMat[ind,j] <- posterior[i,j]
      }
    }
  reg <- beta <- S <- vector("list",length=ng)
  postd <- matrix(nrow=nrow(posterior),ncol=ng)
  names(reg) <- names(beta) <- names(S) <- 1:ng
  #
  modSel <- function(xname, wei) {
    form <- formula(paste0(xname,"~",paste("I(t^",1:d,")",collapse="+")))
    mod <- lm(form, data=data, weights=wei)
    deg <- d
    fine <- 0
    #
    if(sum(wei!=0)==0) fine <- 1  ## <-- gestione gruppi vuoti
    #
    while(fine==0) {
      if(deg<=0) {
        fine <- 1
        } else {
        pval <- suppressWarnings(summary(mod)$coef)[,4]
        p_last <- pval[length(pval)]
        if(is.na(p_last) || p_last>0.05) {
          deg <- deg-1
          if(deg>=1) {
            form <- formula(paste0(xname,"~",paste("I(t^",1:deg,")",collapse="+")))
            } else {
            form <- formula(paste0(xname,"~1"))
            }
          mod <- lm(form, data=data, weights=wei)
          } else {
          fine <- 1  
          }
        }
      }
    mod$call$formula <- form
    mod$call$weights <- mod$call$data <- NULL
    mod
    }
  #
  for(i in 1:ng) {
    ireg <- list()
    ibhat <- matrix(0,nrow=d+1,ncol=length(nomi))
    rownames(ibhat) <- pnam
    colnames(ibhat) <- nomi
    for(j in 1:length(nomi)) {
      ijmod <- modSel(xname=nomi[j], wei=weiMat[,i])
      ireg[[j]] <- ijmod
      ijb <- ijmod$coefficients
      ibhat[1:length(ijb),j] <- ijb
      }
    names(ireg) <- nomi
    beta[[i]] <- ibhat
    ires <- do.call(cbind,lapply(ireg,function(x){x$residuals}))
    ifit <- do.call(cbind,lapply(ireg,function(x){x$fitted.values}))
    iS <- matrix(0,nrow=length(nomi),ncol=length(nomi))
    rownames(iS) <- colnames(iS) <- nomi
    iswei <- sum(weiMat[,i])
    for(j in 1:nrow(ires)) {
      ijr <- as.vector(t(ires[j,]))
      iS <- iS+weiMat[j,i]/iswei*(ijr%*%t(ijr))
      }
    S[[i]] <- smoothFun(iS)
    reg[[i]] <- ireg
    for(w in 1:length(cou)) {
      auxw <- which(data[,1]==cou[w])
      if(length(nomi)>1) {
        wdat <- as.matrix(data[auxw,nomi,drop=F])
        wfit <- ifit[auxw,nomi,drop=F]
        ijD <- sum(logdmN(wdat, wfit, S[[i]]))
        } else {
        ijD <- sum(dnorm(data[auxw,nomi], ifit[auxw], sqrt(S[[i]]),log=T))
        }
      postd[w,i] <- pldBound(ijD+log(mean(posterior[,i])))
      }
    }
  ll <- sum(log(apply(exp(postd),1,sum)))
  list(beta=beta,S=S,reg=reg,logLik=ll)
  }

# bound posterior log density (auxiliary)
pldBound <- function(x) {
  max(min(x,709.782),-7.451e+2)
  }

# fit wls (auxiliary)
wlsFit <- function(x.names,data,t,weights=NULL,d=1) {
  modList <- list()
  beta <- matrix(nrow=d+1,ncol=length(x.names))
  res <- cbind()
  for(i in 1:length(x.names)) {
    iform <- formula(paste0(x.names[i],"~",paste("I(t^",1:d,")",collapse="+")))
    imod <- lm(iform, data=data, weights=weights)
    imod$call$formula <- iform
    imod$call$data <- NULL
    #deparse(substitute(data))
    modList[[i]] <- imod
    beta[,i] <- imod$coefficients
    res <- cbind(res, imod$residuals)
    }
  names(modList) <- colnames(beta) <- colnames(res) <- x.names
  rownames(beta) <- c("(Intercept)",paste0("I(t^",1:d,")"))
  if(is.null(weights)) {
    S <- cov(res)
    } else {
    S <- matrix(0,nrow=length(x.names),ncol=length(x.names))
    rownames(S) <- colnames(S) <- x.names
    swei <- sum(weights)
    for(i in 1:nrow(res)) {
      ir <- as.vector(t(res[i,]))
      S <- S+weights[i]/swei*(ir%*%t(ir))
      }
    }
  list(fit=modList,beta=beta,S=smoothFun(S))
  }

# run em algorithm (auxiliary)
gbmtFit <- function(x.names,unit,time,ng,d,data,pruning,tol,maxit,init,quiet,id_restart,nch0) {
  data[,unit] <- factor(data[,unit])
  tnam <- levels(factor(data[,time]))
  tval <- as.numeric(data[,time]-min(data[,time])+1)
  ismiss <- sum(is.na(data[,x.names]))>0
  cou <- levels(factor(data[,unit]))
  n <- length(cou)
  np <- length(x.names)
  nt <- length(tnam)
  if(!is.null(id_restart)) {
    mstr0 <- paste0("Restart ",id_restart," - ")
    } else {
    mstr0 <- ""
    }
  # create objects
  if(is.null(init)) {
    Z <- Z0 <- sample(rep(1:ng, length=n))
    } else {
    Z <- Z0 <- init
    }
  names(Z) <- names(Z0) <- cou
  Xmat <- matrix(nrow=nt,ncol=d+1)
  for(i in 0:d) Xmat[,i+1] <- sort(unique(tval))^i
  rownames(Xmat) <- tnam
  colnames(Xmat) <- paste0("I(t^",0:d,")")
  postd <- postp <- matrix(nrow=n,ncol=ng)
  rownames(postd) <- rownames(postp) <- cou
  p0 <- prop.table(table(factor(Z,levels=1:ng)))
  beta <- S <- reg <- vector("list",length=ng)
  dataI <- data
  if(ismiss) {
    for(j in 1:length(x.names)) {
      ijna <- which(is.na(data[,x.names[j]]))
      if(length(ijna)>0) dataI[ijna,x.names[j]] <- mean(data[,x.names[j]],na.rm=T)
      }
    dataI_list <- vector("list",length=ng)
    }
  form <- formula(paste("cbind(",paste(x.names,collapse=","),") ~ ",paste("I(t^",1:d,")",collapse="+"),sep=""))
  # initialization
  for(i in 1:ng) {
    iwei <- rep(0,nrow(data))
    iwei[which(data[,unit]%in%cou[which(Z==i)])] <- 1
    imod <- wlsFit(x.names=x.names, data=dataI, t=tval, weights=iwei, d=d)
    reg[[i]] <- imod$fit
    beta[[i]] <- imod$beta
    S[[i]] <- imod$S
    for(w in 1:n) {
      auxw <- which(data[,unit]==cou[w])
      wdat <- as.matrix(dataI[auxw,x.names,drop=F])
      if(np>1) {
        ijD <- sum(logdmN(wdat, Xmat%*%beta[[i]], S[[i]]))
        } else {
        ijD <- sum(dnorm(dataI[auxw,x.names], Xmat%*%beta[[i]], sqrt(S[[i]]),log=T))
        }
      postd[w,i] <- pldBound(ijD+log(p0[i]))
      }
    }
  ll_old <- sum(log(apply(exp(postd),1,sum)))
  fine <- count <- count2 <- 0
  if(quiet==F) {
    mstr <- paste0(mstr0,"EM iteration ",count,". Log likelihood: ",round(ll_old,4))
    nch <- nchar(mstr)
    if(nch<nch0) {
      estr <- rep(" ",nch0-nch)
      } else {
      estr <- ""
      }
    cat('\r',mstr,estr)
    flush.console()
    } else {
    nch <- 0  
    }
  p0_old <- p0
  beta_old <- beta
  S_old <- S
  reg_old <- reg
  # begin EM cycle
  while(fine==0) {
    count <- count+1
    # e-step: compute posterior
    for(i in 1:nrow(postd)) {
      postp[i,] <- exp(postd[i,])/sum(exp(postd[i,]))
      }
    Z_old <- Z
    Z <- apply(postp,1,which.max)
    # m-step: calcolo prior, beta, sigma
    p0 <- colMeans(postp)
    for(i in 1:ng) {
      iwei <- c()
      for(j in 1:length(cou)) {
        iwei[which(data[,unit]==cou[j])] <- postp[j,i]
        }
      if(ismiss) {
        idat <- dataI_list[[i]] <- imputFun(xdat=data[,x.names,drop=F], tval=tval, beta=beta[[i]], S=S[[i]], d=d)
        } else {
        idat <- data[,x.names,drop=F]
        }
      imod <- wlsFit(x.names=x.names, data=idat, t=tval, weights=iwei, d=d)
      reg[[i]] <- imod$fit
      beta[[i]] <- imod$beta
      S[[i]] <- imod$S
      for(w in 1:n) {
        auxw <- which(data[,unit]==cou[w])
        wdat <- as.matrix(idat[auxw,x.names,drop=F])
        if(np>1) {
          ijD <- sum(logdmN(wdat, Xmat%*%beta[[i]], S[[i]]))
          } else {
          ijD <- sum(dnorm(wdat, Xmat%*%beta[[i]], sqrt(S[[i]]),log=T))
          }
        postd[w,i] <- pldBound(ijD+log(p0[i]))
        }
      }
    # compute log likelihood
    ll_new <- sum(log(apply(exp(postd),1,sum)))
    # check convergence
    if(ll_new>=ll_old) {
      if(ll_new-ll_old<tol) fine <- 1
      } else {
      #print(paste0("Likelihood decreased: ",ll_old," -> ",ll_new))
      p0 <- p0_old
      beta <- beta_old
      S <- S_old
      reg <- reg_old
      ll_new <- ll_old
      count <- count-1
      fine <- 1
      }
    if(quiet==F) {
      mstr <- paste0(mstr0,"EM iteration ",count,". Log likelihood: ",round(ll_new,4))
      nch <- nchar(mstr)
      if(nch<nch0) {
        estr <- rep(" ",nch0-nch)
        } else {
        estr <- ""
        }
      cat('\r',mstr,estr)
      flush.console()
      nch0 <- nch
      }
    if(count>=maxit) fine <- 1
    if(sum(Z_old-Z)==0) {
      count2 <- count2+1
      if(ll_new-ll_old<tol*sqrt(count2)) fine <- 1
      } else {
      count2 <- 0
      }
    if(fine==0) {
      ll_old <- ll_new
      p0_old <- p0
      beta_old <- beta
      S_old <- S
      reg_old <- reg
      postp_old <- postp
      }
    }
  # end EM cycle
  for(i in 1:nrow(postd)) {
    postp[i,] <- exp(postd[i,])/sum(exp(postd[i,]))
    }
  Z <- apply(postp,1,which.max)
  assL <- vector("list",length=ng)
  for(i in 1:ng) {
    assL[[i]] <- sort(names(which(Z==i)))
    }
  # imputed data
  if(ismiss) {
    auxdat <- dataI_list[[1]][,x.names,drop=F]*p0[1]
    if(length(dataI_list)>1) {
      for(i in 2:length(dataI_list)) {
        auxdat <- auxdat+dataI_list[[i]][,x.names,drop=F]*p0[i]
        }
      }
    newdat <- data.frame(data[,c(unit,time)],auxdat)
    } else {
    newdat <- data[,c(unit,time,x.names)]
    }
  if(pruning) {
    mpruned <- pruneFun(beta=beta, posterior=postp, data=newdat)
    reg <- mpruned$reg
    beta <- mpruned$beta
    Sigma <- mpruned$S
    ll_new <- mpruned$logLik
    }
  npar <- nparCalc(beta)
  ic <- icCalc(ll_new, npar=npar, ss=nrow(newdat))
  pred <- vector("list",length=ng)
  names(pred) <- 1:ng
  for(i in 1:ng) {
    pred[[i]] <- data.frame(Xmat%*%beta[[i]])
    rownames(pred[[i]]) <- tnam
    colnames(pred[[i]]) <- x.names
    }
  names(p0) <- names(assL) <- colnames(postp) <- names(beta) <- names(S) <- names(reg) <- 1:ng
  datOK <- data[,c(unit,time,x.names)]
  rownames(datOK) <- NULL
  res <- list(call=list(x.names=x.names,unit=unit,time=time,ng=ng,d=d),
              prior=p0, beta=beta, Sigma=S,
              posterior=round(postp,4), Xmat=Xmat, fitted=pred, reg=reg,
              assign=Z, assign.list=assL,
              logLik=ll_new, npar=npar, ic=ic, appa=appaCalc(postp),
              data.orig=NULL, data.norm=newdat, data.imputed=NULL,
              em=c(n.iter=count,converged=ifelse(count>=maxit,0,1)),
              #initial=Z0,
              nch=nch)
  class(res) <- "gbmt"
  res
  }

# fit gbmt
gbmt <- function(x.names,unit,time,ng=1,d=2,data,scaling=2,pruning=TRUE,nstart=NULL,tol=1e-4,maxit=1000,quiet=FALSE) {
  #
  if(missing(data)) stop("Missing argument 'data'")
  if(!identical(class(data), "data.frame")) stop("Argument 'data' must be a data.frame")
  #
  if(missing(unit)) stop("Missing argument 'unit'")
  if(!is.character(unit) || length(unit)!=1) stop("Argument 'unit' must be a character vector of length 1")
  if((unit%in%colnames(data))==F) stop("Variable '",unit,"' not found",sep="")
  if(sum(is.na(data[,unit]))>0) stop("Variable '",unit,"' cannot contain missing values")
  #
  if(missing(time)) stop("Missing argument 'time'")
  if(!is.character(time) || length(time)!=1) stop("Argument 'time' must be a character vector of length 1")
  if((time%in%colnames(data))==F) stop("Variable '",time,"' not found",sep="")
  if(!is.numeric(data[,time]) & inherits(data[,time],'Date')==F) stop("Variable '",time,"' must be either numeric or date")
  if(sum(is.na(data[,time]))>0) stop("Variable '",time,"' cannot contain missing values")
  #
  if(missing(x.names)) stop("Missing argument 'x.names'")
  if(!is.character(x.names) || length(x.names)<1) stop("Argument 'x.names' must be a character vector of length 1 or greater")
  auxchk <- setdiff(x.names, colnames(data))
  if(length(auxchk)>0) stop("Variable '",auxchk[1],"' not found",sep="")
  for(i in 1:length(x.names)) {
    if(!is.numeric(data[,x.names[i]])) {
      stop("Variable '",x.names[i],"' is not numeric",sep="")      
      }
    if(sum(!is.na(data[,x.names[i]]))==0) {
      stop("Variable '",x.names[i],"' is completely missing",sep="")      
      }
    }
  if(!is.numeric(scaling)) {
    scaling <- 0
    } else {
    auxscal <- intersect(scaling,0:4)
    if(length(auxscal)==0) auxscal <- 0 else auxscal <- auxscal[1]
    scaling <- auxscal
    }
  if(scaling %in% 3:4) {
    for(i in 1:length(x.names)) {
      if(sum(data[,x.names[i]]<=0,na.rm=T)>0) stop("Variable '",x.names[i],"' contains zero or negative values: scaling=",scaling," cannot be applied",sep="")
      }
    }
  if(!is.numeric(ng) || length(ng)!=1 || ng<=0 || ng!=round(ng)) stop("Argument 'ng' must be a positive integer number")
  if(!is.numeric(d) || length(d)!=1 || d<=0 || d!=round(d)) stop("Argument 'd' must be a positive integer number")
  if(!is.null(nstart) && (length(nstart)!=1 || nstart<=0 || nstart!=round(nstart))) stop("Argument 'nstart' must be either NULL or a positive integer number")
  if(!is.numeric(tol) || length(tol)!=1 || tol<=0) stop("Argument 'tol' must be a positive number")
  if(!is.null(maxit) && (length(maxit)!=1 || maxit<=0 || maxit!=round(maxit))) stop("Argument 'maxit' must be a positive integer number")
  if(!is.logical(pruning)) pruning <- T else pruning <- pruning[1]
  if(!is.logical(quiet)) quiet <- F else quiet <- quiet[1]
  # sort data by time
  data[,unit] <- factor(data[,unit])
  cou <- levels(data[,unit])
  for(i in 1:length(cou)) {
    ind <- which(data[,unit]==cou[i])
    idat <- data[ind,]
    if(sum(duplicated(idat[,time]))>0) stop("Unit '",cou[i],"' has duplicated time points")
    data[ind,] <- idat[order(idat[,time]),]
    }
  # rescaling
  dataOK <- data
  for(i in 1:length(cou)) {
    ind <- which(data[,unit]==cou[i])
    for(j in 1:length(x.names)) {
      ijdat <- data[ind,x.names[j]]
      ijpar <- c(mean(ijdat,na.rm=T),sd(ijdat,na.rm=T))
      if(is.na(ijpar[1])) ijpar[1] <- mean(data[,x.names[j]],na.rm=T)
      if(is.na(ijpar[2])|ijpar[2]==0) ijpar[2] <- sd(data[,x.names[j]],na.rm=T)
      dataOK[ind,x.names[j]] <- scaleFun(ijdat, scaling=scaling, par=ijpar)
      }
    }
  if(!is.null(nstart)) {
    mOK <- gbmtFit(x.names=x.names,unit=unit,time=time,ng=ng,d=d,data=dataOK,pruning=pruning,tol=tol,maxit=maxit,init=NULL,quiet=quiet,id_restart=1,nch0=0)
    res <- c(logLik=mOK$logLik,mOK$em)
    #iniMat <- rbind(mOK$initial)
    #finMat <- rbind(mOK$assign)
    nch0 <- mOK$nch
    if(nstart>1) {
      for(i in 2:nstart) {
        m0 <- gbmtFit(x.names=x.names,unit=unit,time=time,ng=ng,d=d,data=dataOK,pruning=pruning,tol=tol,maxit=maxit,init=NULL,quiet=quiet,id_restart=i,nch0=nch0)
        res <- rbind(res,(c(logLik=m0$logLik,m0$em)))
        #iniMat <- rbind(iniMat,m0$initial)
        #finMat <- rbind(finMat,m0$assign)
        if(m0$logLik>mOK$logLik) mOK <- m0
        nch0 <- m0$nch
        }
      }
    } else {
    ini0 <- rep(1:ng,length=length(cou))
    names(ini0) <- sort(cou)
    mOK <- gbmtFit(x.names=x.names,unit=unit,time=time,ng=ng,d=d,data=dataOK,pruning=pruning,tol=tol,maxit=maxit,init=ini0,quiet=quiet,id_restart=NULL,nch0=0)
    res <- rbind(c(logLik=mOK$logLik,mOK$em))
    #iniMat <- rbind(mOK$initial)
    #finMat <- rbind(mOK$assign)
    }
  if(quiet==F) cat("\n")
  rownames(res) <- NULL
  mOK$data.orig <- data[,c(unit,time,x.names)]
  if(sum(is.na(data[,x.names]))>0) { 
    mu <- do.call(rbind,lapply(split(data,data[,unit]),function(x){
      apply(x[,x.names,drop=F],2,mean,na.rm=T)
      }))
    dataI <- mOK$data.norm
    for(i in 1:length(cou)) {
      ind <- which(data[,unit]==cou[i])
      for(j in 1:length(x.names)) {
        ijdat <- data[ind,x.names[j]]
        ijpar <- c(mean(ijdat,na.rm=T),sd(ijdat,na.rm=T))
        if(is.na(ijpar[1])) ijpar[1] <- mean(data[,x.names[j]],na.rm=T)
        if(is.na(ijpar[2])|ijpar[2]==0) ijpar[2] <- sd(data[,x.names[j]],na.rm=T)
        dataI[ind,x.names[j]] <- scaleFun_inv(mOK$data.norm[ind,x.names[j]], scaling=scaling, par=ijpar)
        }
      }
    #
    mOK$data.imputed <- dataI
    } else {
    mOK$data.imputed <- data[,c(unit,time,x.names)]
    }
  mOK$em <- res
  mOK$call$scaling <- scaling
  mOK$call$pruning <- pruning
  mOK$call$nstart <- nstart
  mOK$call$tol <- tol
  mOK$call$maxit <- maxit
  #rownames(iniMat) <- rownames(finMat) <- NULL
  #mOK$initial <- iniMat
  #mOK$final <- finMat
  mOK$nch <- NULL
  mOK
  }

# print method
print.gbmt <- function(x, ...) {
  gdis <- table(factor(x$assign,levels=1:x$call$ng))
  for(i in 1:x$call$ng) {
    cat("Group ",i,": ",gdis[i]," units (APPA: ",round(x$appa[i],4),")",sep="","\n")
    print(t(x$beta[[i]]))
    if(i<length(gdis)) cat("\n")
    }
  }

# summary method
summary.gbmt <- function(object, ...) {
  suppressWarnings(lapply(object$reg, function(x){
    lapply(x, summary, ...)
    }))
  }

# coef method
coef.gbmt <- function(object, ...) {
  object$beta
  }

# residuals method
residuals.gbmt <- function(object, ...) {
  lapply(object$reg, function(x) {
    data.frame(
      object$data.orig[,c(object$call$unit,object$call$time)],
      do.call(cbind,lapply(x,residuals))
      )
    })
  }

# fitted method
fitted.gbmt <- function(object, ...) {
  lapply(object$reg, function(x) {
    data.frame(
      object$data.orig[,c(object$call$unit,object$call$time)],
      do.call(cbind,lapply(x,fitted))
      )
    })
  }

# logLik method
logLik.gbmt <- function(object, ...) {
  ll <- object$logLik
  attr(ll,"df") <- nparCalc(object$beta)
  class(ll) <- "logLik"
  ll
  }

# extractAIC method
extractAIC.gbmt <- function(fit, scale, k=2, ...) {
  npar <- nparCalc(fit$beta)
  c(npar, -2*fit$logLik+k*npar)
  }

# AIC method
AIC.gbmt <- function(object, k=2, ...) {
  -2*object$logLik+k*nparCalc(object$beta)
  }

# BIC method
BIC.gbmt <- function(object, ...) {
  -2*object$logLik+nparCalc(object$beta)*log(nrow(object$data.imputed))
  }

# predict method
predict.gbmt <- function(object, unit=NULL, n.ahead=0, bands=TRUE, conf=0.95, in.sample=FALSE, ...) {
  if(!is.null(unit)) {
    if(length(unit)>1) stop("Argument 'unit' must be of length 1")
    unitOK <- levels(factor(object$data.orig[,object$call$unit]))
    if((unit%in%unitOK)==F) stop("Unknown unit '",unit,"'")
    }
  if(!is.logical(bands)) bands <- T else bands <- bands[1]
  if(!is.logical(in.sample)) in.sample <- F else in.sample <- in.sample[1]
  #
  if(!is.numeric(n.ahead)) stop("Argument 'n.ahead' must be a non-negative integer number")
  n.ahead <- max(n.ahead,na.rm=T)
  if(n.ahead<0 || n.ahead!=round(n.ahead)) stop("Argument 'n.ahead' must be a non-negative integer number")
  #
  ## <-- gestire n.ahead vettore
  #
  if(identical(n.ahead,0) && in.sample==F) in.sample <- T
  if(bands) {
    if(length(conf)>1) conf <- conf[1]
    if(!is.numeric(conf) || conf<0 || conf>=1) stop("Argument 'conf' must be a non-negative value less than 1")
    plab <- signif((1-conf)/2*100,2)
    }
  reg <- object$reg
  Xmat <- object$Xmat
  t0 <- min(object$data.orig[,object$call$time])-1
  nt <- max(Xmat[,2])
  tdat <- c()
  if(in.sample || identical(n.ahead,0)) tdat <- data.frame(t=Xmat[,2])
  if(n.ahead>0) tdat <- data.frame(t=c(tdat$t,(nt+1):(nt+n.ahead)))
  if(is.numeric(object$data.orig[,object$call$time])) {
    tnam <- t0+tdat$t
    } else {
    tnam <- as.Date(tdat$t,t0)
    }
  rownames(tdat) <- tnam
  pr <- list()
  for(i in 1:length(reg)) {
    suppressWarnings(
      if(bands) {
        ipr <- lapply(reg[[i]], function(z) {
          m <- predict.lm(z, newdata=tdat, interval="prediction", level=conf)
          colnames(m) <- c("mean",paste0(c(plab,100-plab),"%"))
          m
          })
        } else {
        ipr <- lapply(reg[[i]], function(z) {
          m <- as.matrix(predict.lm(z, newdata=tdat))
          colnames(m) <- "mean"
          m
          })
        }
      )
    pr[[i]] <- ipr
    }
  names(pr) <- names(reg)
  if(!is.null(unit)) {
    ind <- which(object$data.orig[,object$call$unit]==unit)
    wei <- object$posterior[unit,]
    dat <- object$data.imputed
    pr_new <- pr[[1]]
    for(i in 1:length(pr_new)) pr_new[[i]][] <- 0
    for(i in 1:length(pr_new)) {
      ipar <- c(mean(dat[ind,names(pr_new)[i]],na.rm=T),
                sd(dat[ind,names(pr_new)[i]],na.rm=T))
      if(is.na(ipar[1])) ipar[1] <- mean(dat[,names(pr_new)[i]],na.rm=T)
      if(is.na(ipar[2])|ipar[2]==0) ipar[2] <- sd(dat[,names(pr_new)[i]],na.rm=T)
      for(j in 1:length(pr)) {
        pr_new[[i]] <- pr_new[[i]]+wei[j]*scaleFun_inv(pr[[j]][[i]], scaling=object$call$scaling, par=ipar)
        }
      }
    pr_new
    } else {
    pr  
    }
  }

# check color (auxiliary)
areColors <- function(x) {
  sapply(x, function(y) {
    tryCatch(is.matrix(col2rgb(y)), error=function(e) FALSE)
    })
  }

# create transparent color (auxiliary)
t_col <- function(color, percent=50, name=NULL) {
  rgb.val <- col2rgb(color)
  percent <- max(0,min(100,percent[1]))
  tcol <- rgb(rgb.val[1], rgb.val[2], rgb.val[3], maxColorValue=255,
                alpha=(100-percent)*255/100, names=name)
  invisible(tcol)
  }

# plot method
plot.gbmt <- function(x, group=NULL, unit=NULL, x.names=NULL, n.ahead=0, bands=TRUE, conf=0.95, observed=TRUE, equal.scale=FALSE, trim=0,ylim=NULL, xlab="", ylab="",
                      titles=NULL, add.grid=TRUE, col=NULL, transparency=-1, add.legend=TRUE, pos.legend=c(0,0), cex.legend=0.6, mar=c(5.1,4.1,4.1,2.1), ...) {
  if(is.null(unit) && !is.null(group)) {
    if((group %in% names(x$reg))==F & (group %in% 1:length(x$reg))==F) stop("Invalid argument 'group': valid values are 1, ..., ",length(x$beta))
    }
  if(!is.null(unit)) {
    if(length(unit)>1) stop("Argument 'unit' must be of length 1")
    unitOK <- levels(factor(x$data.orig[,x$call$unit]))
    if((unit%in%unitOK)==F) stop("Unknown unit '",unit,"'")
    }
  if(is.numeric(n.ahead)) n.ahead <- max(n.ahead,na.rm=T)
  if(!is.numeric(n.ahead) || n.ahead<0 || n.ahead!=round(n.ahead)) stop("Argument 'n.ahead' must be a non-negative integer number")
  if(!is.logical(bands)) bands <- T else bands <- bands[1]
  if(!is.logical(add.grid)) add.grid <- T else add.grid <- add.grid[1]
  if(bands) {
    if(length(conf)>1) conf <- conf[1]
    if(!is.numeric(conf) || conf<0 || conf>=1) stop("Argument 'conf' must be a non-negative value less than 1")
    if(!is.numeric(transparency)) transparency <- -1
    transparency <- transparency[1]
    }
  pr <- predict.gbmt(x, group=NULL, unit=unit, n.ahead=n.ahead, bands=bands, conf=conf, in.sample=T)
  xnam <- xall <- x$call$x.names
  if(!is.null(x.names)) {
    auxchk <- setdiff(x.names,xall)
    if(length(auxchk)>0) stop("Unknown variable '",auxchk[1],"' in argument 'x.names'")
    xnam <- x.names
    }
  if(length(xnam)>1) {
    #mar0 <- par()$mar
    #mfrow0 <- par()$mfrow
    #on.exit(par(mar=mar0,mfrow=mfrow0))
    oldpar <- par(no.readonly=T)
    on.exit(par(oldpar)) 
    }
  if(!is.logical(observed)) observed <- T else observed <- observed[1]
  if(!is.logical(equal.scale)) equal.scale <- F else equal.scale <- equal.scale[1]
  if(equal.scale) {
    if(!is.numeric(trim)) trim <- 0
    trim <- trim[1]
    if(trim<0) trim <- 0
    if(trim>1) trim <- 1
    }
  auxcol <- 1:length(x$reg)
  if(!is.null(col)) {
    auxcol <- areColors(col)
    col <- col[which(auxcol)]
    if(length(col)==0) col <- NULL
    }
  if(is.null(col)) {
    if(is.null(group)&is.null(unit)) {
      col <- auxcol+1
      } else {
      col <- 1
      }
    }
  if(!is.numeric(ylim)||length(ylim)<2) ylim <- NULL else ylim <- ylim[1:2]
  if(!is.null(ylim)&&sum(abs(ylim)==Inf,na.rm=T)>0) ylim <- NULL
  Xmat <- x$Xmat
  nt <- max(Xmat[,2])
  t0 <- min(x$data.orig[,x$call$time])-1
  auxseq <- seq(1,nt+n.ahead,length=100)
  tseq <- t0+auxseq
  if(is.numeric(t0)) {
    tlim <- t0+c(1,nt+n.ahead)
    } else {
    tlim <- as.Date(c(1,nt+n.ahead),t0)
    }
  if(is.null(unit) & !is.null(group)) {
    dat <- x$data.norm
    cou <- x$assign.list[[group]]
    ind <- which(dat[,x$call$unit]%in%cou)
    ly <- list()
    for(i in 1:length(xnam)) {
      if(is.null(ylim)) {
        if(observed) {
          if(equal.scale==T) {
            ily1 <- range(pr)
            ily2 <- quantile(as.matrix(dat[,xall],prob=c(trim/2,1-trim/2)))
            } else {
            ily1 <- range(pr[[group]][[xnam[i]]])
            ily2 <- quantile(dat[ind,xnam[i]],prob=c(trim/2,1-trim/2))
            }
          ly[[i]] <- c(min(ily1[1],ily2[1]),max(ily1[2],ily2[2]))
          } else {
          if(equal.scale==T) {
            ly[[i]] <- range(pr)
            } else {
            ly[[i]] <- range(pr[[group]][[xnam[i]]])
            }
          }
        } else {
        ly[[i]] <- ylim
        }
      }
    if(length(xnam)>1) par(mar=mar,mfrow=n2mfrow(length(xnam)))
    for(i in 1:length(xnam)) {
      itit <- ifelse(is.null(titles),xnam[i],titles[i])
      plot(t0,0,type="n",ylim=ly[[i]],xlim=tlim,xlab=xlab,ylab=ylab,main=itit,...)
      if(add.grid) grid()
      if(observed) {
        for(j in 1:length(cou)) {
          ijind <- which(dat[,x$call$unit]==cou[j])
          ijt <- Xmat[as.character(dat[ijind,x$call$time]),2]
          lines(t0+ijt,dat[ijind,xnam[i]],col="grey70")
          }
        }
      if(bands) {
        suppressWarnings(
          ibands <- predict.lm(x$reg[[group]][[xnam[i]]], newdata=data.frame(t=auxseq), interval="prediction", level=conf)
          )
        if(transparency>=0) {
          polygon(c(tseq,rev(tseq)), c(ibands[,2],rev(ibands[,3])),
            border=NA, col=t_col(col[1],percent=transparency))
          } else {
          lines(tseq, ibands[,2], lty=2, col=col[1])
          lines(tseq, ibands[,3], lty=2, col=col[1])
          }  
        lines(tseq, ibands[,1], lwd=2, col=col[1])
        } else {
        suppressWarnings(
          ipr <- predict.lm(x$reg[[group]][[xnam[i]]],newdata=data.frame(t=auxseq))
          )
        lines(tseq, ipr, lwd=2, col=col[1])
        }
      if(n.ahead>0) abline(v=t0+nt, lty=2)
      box()
      }
    } else if(!is.null(unit)) {
    dat <- x$data.imputed
    cou <- unit
    wei <- x$posterior[unit,]
    ly <- list()
    ind <- which(dat[,x$call$unit]%in%unit)
    for(i in 1:length(xnam)) {
      if(is.null(ylim)) {
        if(observed) {
          ily1 <- range(pr[[xnam[i]]])
          ily2 <- quantile(dat[ind,xnam[i]],prob=c(trim/2,1-trim/2))
          ly[[i]] <- c(min(ily1[1],ily2[1]),max(ily1[2],ily2[2]))
          } else {
          ly[[i]] <- range(pr[[xnam[i]]])
          }
        } else {
        ly[[i]] <- ylim
        }
      }
    if(length(xnam)>1) par(mar=mar,mfrow=n2mfrow(length(xnam)))
    for(i in 1:length(xnam)) {
      itit <- ifelse(is.null(titles),xnam[i],titles[i])
      plot(t0,0,type="n",ylim=ly[[i]],xlim=tlim,xlab=xlab,ylab=ylab,main=itit,...)
      if(add.grid) grid()
      if(observed) {
        for(j in 1:length(cou)) {
          ijind <- which(dat[,x$call$unit]==cou[j])
          ijt <- Xmat[as.character(dat[ijind,x$call$time]),2]
          lines(t0+ijt,dat[ijind,xnam[i]],col="grey70")
          }
        }
      ipar <- c(
        mean(dat[which(dat[,x$call$unit]==unit),xnam[i]],na.rm=T),
        sd(dat[which(dat[,x$call$unit]==unit),xnam[i]],na.rm=T)
        )
      if(is.na(ipar[1])) ipar[1] <- mean(dat[,xnam[i]],na.rm=T)
      if(is.na(ipar[2])|ipar[2]==0) ipar[2] <- sd(dat[,xnam[i]],na.rm=T)
      if(bands) {
        ibandsL <- list()
        for(j in 1:length(x$reg)) {
          suppressWarnings(
            ibandsL[[j]] <- wei[j]*predict.lm(x$reg[[j]][[xnam[i]]], newdata=data.frame(t=auxseq), interval="prediction", level=conf)
            )
          }
        ibands_mix <- ibandsL[[1]]
        ibands_mix[] <- 0
        for(j in 1:length(ibandsL)) {
          ibands_mix <- ibands_mix+ibandsL[[j]]
          }
        ibands <- apply(ibands_mix, 2, scaleFun_inv, scaling=x$call$scaling, par=ipar)
        if(transparency>=0) {
          polygon(c(tseq,rev(tseq)), c(ibands[,2],rev(ibands[,3])),
            border=NA, col=t_col(col[1],percent=transparency))
          } else {
          lines(tseq, ibands[,2], lty=2, col=col[1])
          lines(tseq, ibands[,3], lty=2, col=col[1])
          }  
        lines(tseq, ibands[,1], lwd=2, col=col[1])
        } else {
        iprL <- list()
        for(j in 1:length(x$reg)) {
          suppressWarnings(
            iprL[[j]] <- wei[j]*predict.lm(x$reg[[j]][[xnam[i]]], newdata=data.frame(t=auxseq))
            )
          }
        ipr_mix <- iprL[[1]]
        ipr_mix[] <- 0
        for(j in 1:length(iprL)) {
          ipr_mix <- ipr_mix+iprL[[j]]
          }
        ipr <- scaleFun_inv(ipr_mix, scaling=x$call$scaling, par=ipar)
        lines(tseq, ipr, lwd=2, col=col[1])
        }
      if(n.ahead>0) abline(v=t0+nt, lty=2)
      box()
      }
    } else {
    if(!is.logical(add.legend)) add.legend <- T else add.legend <- add.legend[1]
    if(add.legend) {
      if(!is.numeric(pos.legend)) cex.legend <- c(0,0)
      pos.legend <- unname(c(pos.legend,0)[1:2])
      if(!is.numeric(cex.legend)) cex.legend <- 0.6
      cex.legend <- cex.legend[1]
      }
    dat <- x$data.norm
    cou <- unlist(x$assign.list)
    ind <- 1:nrow(dat)
    ly <- list()
    for(i in 1:length(xnam)) {
      if(is.null(ylim)) {
        if(equal.scale) {
          ly[[i]] <- range(pr)
          } else {
          ly[[i]] <- range(lapply(pr,function(z){z[[xnam[i]]]}))
          }
        } else {
        ly[[i]] <- ylim
        }
      }
    if(length(xnam)>1) par(mar=mar,mfrow=n2mfrow(length(xnam)))
    for(i in 1:length(xnam)) {
      itit <- ifelse(is.null(titles),xnam[i],titles[i])
      plot(t0,0,type="n",ylim=ly[[i]],xlim=tlim,xlab=xlab,ylab=ylab,main=itit,...)
      if(add.grid) grid()
      for(j in 1:length(x$reg)) {
        #if(observed) {
        #  for(k in 1:length(cou[[j]])) {
        #    ind <- which(dat[,x$call$unit]==cou[[j]][k])
        #    ijkt <- Xmat[as.character(dat[ind,x$call$time]),2]
        #    lines(t0+ijkt,dat[ind,xnam[i]],col=t_col(col[j],percent=0.5))
        #    }
        #  }
        if(bands) {
          suppressWarnings(
            ijbands <- predict.lm(x$reg[[j]][[xnam[i]]], newdata=data.frame(t=auxseq), interval="prediction", level=conf)
            )
          if(transparency>=0) {
            polygon(c(tseq,rev(tseq)), c(ijbands[,2],rev(ijbands[,3])),
              border=NA, col=sapply(col[j],t_col,percent=transparency))
            } else {
            lines(tseq, ijbands[,2], lty=2, col=col[j])
            lines(tseq, ijbands[,3], lty=2, col=col[j])
            }
          lines(tseq, ijbands[,1], lwd=2, col=col[j])
          } else {
          suppressWarnings(
            ijpr <- predict.lm(x$reg[[j]][[xnam[i]]],newdata=data.frame(t=auxseq))
            )
          lines(tseq, ijpr, lwd=2, col=col[j])
          }
        }
      if(add.legend) legend("topleft", legend=1:length(x$reg), col=col, lty=1, bty="n", horiz=T, cex=cex.legend, xpd=!identical(pos.legend,0), inset=pos.legend)
      if(n.ahead>0) abline(v=t0+nt, lty=2)
      box()
      }
    }
  #if(!is.null(main)) mtext(main,side=3,outer=T)
  }

# compute posterior probabilities for a new unit
posterior <- function(x, newdata=NULL) {
  if(is.null(newdata)) {
    x$posterior
    } else {
    if(!identical(class(x),"gbmt")) stop("Argument 'x' must be an object of class 'gbmt'")
    if(!identical(class(newdata),"data.frame")) stop("Argument 'newdata' must be NULL or an object of class 'data.frame'")
    xtime <- x$call$time
    if((xtime%in%colnames(newdata))==F) stop("Variable '",xtime,"' not found in 'newdata'")
    xnam <- x$call$x.names
    newdataS <- newdata
    for(i in 1:length(xnam)) {
      if((xnam[i]%in%colnames(newdata))==F) stop("Variable '",xnam[i],"' not found in 'newdata'")
      if(sum(is.na(newdata[,xnam[i]]))>0) stop("Variable '",xnam[i],"' contains missing values")
      }
    reg <- x$reg
    beta <- x$beta
    S <- x$Sigma
    prior <- x$prior
    xuni <- x$call$unit
    if(xuni%in%colnames(newdata)) {
      datList <- split(newdata, as.character(newdata[,xuni]))
      } else {
      datList <- list(newdata)
      }
    res <- list()
    for(w in 1:length(datList)) {
      tval <- as.numeric(datList[[w]][,xtime]-min(x$data.orig[,xtime])+1)
      auxdat <- datList[[w]][,xnam,drop=F]
      for(j in 1:length(xnam)) {
        ijpar <- c(mean(auxdat[,xnam[j]],na.rm=T), sd(auxdat[,xnam[j]],na.rm=T))
        if(is.na(ijpar[1])) ijpar[1] <- mean(x$data.imputed[,xnam[j]],na.rm=T)
        if(is.na(ijpar[2])|ijpar[2]==0) ijpar[2] <- sd(x$data.imputed[,xnam[j]],na.rm=T)
        auxdat[,xnam[j]] <- scaleFun(auxdat[,xnam[j]], scaling=x$call$scaling, par=ijpar)
        }
      pld <- matrix(nrow=nrow(auxdat),ncol=length(beta))
      for(i in 1:length(beta)) {
        imu <- matrix(nrow=nrow(auxdat),ncol=length(xnam))
        for(j in 1:length(xnam)) {
          imu[,j] <- predict.lm(reg[[i]][[xnam[j]]], data.frame(t=tval))
          }
        pld[,i] <- logdmN(auxdat,imu,S[[i]])
        }
      pld_sum <- sapply(apply(pld,2,sum)+log(prior),pldBound)
      pd <- exp(pld_sum)
      res[[w]] <- round(pd/sum(pd),4)
      }
    tab <- data.frame(do.call(rbind,res))
    rownames(tab) <- names(datList)
    colnames(tab) <- 1:length(beta)
    tab
    }
  }
alessandromagrini/gbmt documentation built on Feb. 6, 2022, 10:55 p.m.