R/TGLG_continuous_lambda_emu_gamma.R

Defines functions TGLG_continuous_lambda_emu_gamma

# TGLG model fitting for continous outcome
# Input Arguments:
#   X: input features, dimension n*p, with n as sample size and p as number of features
#   y: response variable: continous value
#   net: igraph object that represents the network
#   nsim: total number of MCMC iteration
#   ntune: first number of MCMC iteration to adjust propose variance to get desired acceptance rate
#   freqTune: frequency to tune the variance term.
#   nest: number used for estimation
#   nthin: thin MCMC chain
#   a.alpha, b.alpha: inverse-gamma distribution parameter for sigma_alpha
#   a.gamma, b.gamma: inverse-gamma distribution parameter for sigma_gamma
#   lambda.lower, lambda.uppper: lower and upper bound of uniform distribution for lambda
#   emu, esd: mean and standard deviation of log-normal distribution for epsilon
#
# Output object:
#   post_summary: a dataframe including the selection probablity and posterior mean of betas
#   dat: X, y, and net used to generate results
#   save_mcmc: all the mcmc samples saved after burnin and thinning.
#' @export
#' @import glmnet
#' @import mvnfast
#' @import MCMCpack
#' @import pROC
#' @import truncnorm
#' @import Matrix
TGLG_continuous_lambda_emu_gamma = function(X, y, nsim=30000, ntune=10000, freqTune=100,
                                      nest=10000, nthin=10,  a.alpha=0.01,b.alpha=0.01,
                                       lambda=5,
                                      ae0=0.01,be0=0.01,lambda.lower=0,
                                      lambda.upper=10, emu=-5, esd=3,
                                      seed=123, confound=NULL, beta.ini.meth='LASSO',
                                      noise=FALSE){
  #if(X.scale == T) X <- scale(X)
  if(!is.null(confound)){
    if (sum(apply(confound, 2, class) != "numeric") != 0) {
      stop('confounder class type not numeric')
    }
    if(nrow(X) != nrow(confound)) stop('rownames of X and cf do not match')
    X_feat <- X
    #confound_scale <- scale(confound)
    p_feat <- ncol(X_feat)
    X <- as.matrix(cbind(X,confound))
    #net <- net + vertices(colnames(confound))
  }
  set.seed(seed)
  p = ncol(X) #number of features
  
  #laplacian = laplacian_matrix(net,normalized=TRUE,sparse=FALSE)
  #laplacian_sp <- Matrix(laplacian, sparse = TRUE)
  
  #tau.gamma=0.01/p
  tau.lambda=0.1
  tau.epsilon = 0.5
  #count number of acceptance during MCMC updates
  #accept.gamma=0
  accept.epsilon=0
  #accept.lambda =0
  #get initial value using lasso
  #alpha = 0.5
  if(beta.ini.meth == 'LASSO'){
    beta.elas=cv.glmnet(X,y,alpha=1,intercept=FALSE)
    beta.ini=as.numeric(coef(beta.elas,beta.elas$lambda.min))[-1]
  } else if(beta.ini.meth  == 'Univariate') {
    #inital beta values for features
    beta.ini.feat <- sapply(1:p_feat, function(k){
      beta_var <- cbind(X_feat[, k], confound)
      fit_temp <- glm(y~., data=data.frame(beta_var))
      unname(fit_temp$coefficients[2])
    })
    #inital beta values for confounders
    # beta.ini.conf <- sapply(1:(p-p_feat), function(k){
    #   beta_var <- confound[, c(colnames(confound)[k], colnames(confound)[-k])]
    #   fit_temp <- glm(y~., data=data.frame(beta_var))
    #   summary(fit_temp)
    #   unname(fit_temp$coefficients[2])
    # })
    fit_temp <- glm(y~., data=data.frame(confound))
    #fit_temp_scale <- glm(y~., data=data.frame(confound_scale))
    beta.ini.conf <-  unname(fit_temp$coefficients[-1])
    beta.ini <- c(beta.ini.feat, beta.ini.conf)
  } else {
    stop('beta ini method input incorrect')
  }
  
  if(noise == T){
    set.seed(seed)
    noise.add <- rnorm(p, 0, 0.5)
    beta.ini = beta.ini+noise.add
  }
  
  
  #initial value
  sigmae=var(y)#rinvgamma(1,ae0,be0)
  betaini2 = beta.ini[which(beta.ini!=0)]
  if(length(betaini2)==0){
    betaini2=0.001
  }
  #change lambda inital
  # if(lambda.ini == 'median'){
  #   lambda =  median(abs(betaini2))/2
  # } else if (lambda.ini == 'zero') {
  #   lambda = 0
  # } else if (lambda.ini == 'prop') {
  #   beta.abs <- abs(beta.ini.feat)
  #   lambda = quantile(beta.abs, select.prop)
  # } else {
  #   stop('lambda initial incorrect')
  # }
  
  burnin <- nsim-nest
  #gamma =beta.ini
  alpha =beta.ini
  beta=alpha
  #beta = alpha*as.numeric(abs(gamma)>lambda)
  #sigma.alpha = 5
  sigma.alpha=0.1
  #sigma.gamma = b.gamma/a.gamma
  epsilon = emu
  #eigen.value = eigen(laplacian)$values
  #eigen.value = eigen_val(laplacian)
  #Ip = diag(1,nrow(laplacian)) #identy matrix
  #Ip <- .sparseDiagonal(nrow(laplacian))
  
  #eigmat = laplacian+exp(epsilon)*Ip #inverse of graph laplacian matrix
  #eigmat = laplacian_sp+exp(epsilon)*Ip
  
  #matrix to store values for all iterations
  #nmcmc <- nest/nthin
  numrow = nest/nthin
  fgamma=matrix(0,nrow=numrow,ncol=length(beta))
  
  Beta =matrix(0,nrow=numrow,ncol=p)
  Alpha =matrix(0,nrow=numrow,ncol=p)
  #Gamma = matrix(0,nrow=numrow,ncol=p)
  #Lambda=rep(0,numrow)
  #Sigma.gamma=rep(0,numrow)
  Sigma.alpha = rep(0, numrow)
  Epsilon = rep(0, numrow)
  likelihood = rep(0, numrow)
  Sigmae = rep(0, numrow)
  iter = seq((nsim-nest+1),nsim,by=nthin)
  #AcceptGammaTune = c()
  #AcceptGamma = rep(0, numrow)
  #SimTime = rep(0, nsim)
  #sim=1
  pb = txtProgressBar(style=3)
  for(sim in 1:nsim){
    #update gamma using MALA
    # curr.gamma=gamma
    # curr.hgamma= curr.gamma^2-lambda^2
    # curr.beta=alpha*as.numeric(abs(curr.gamma)>lambda)
    # yerr = y-X%*%curr.beta
    # curr.diff=crossprod(X, yerr)
    # curr.dgamma = dappro2(curr.hgamma)*curr.gamma
    # 
    # tau.gamma_half <- tau.gamma/2
    # sigmae_twice <- sigmae*2
    # 
    # eigmat_curr.gamma <- eigmat%*%curr.gamma
    # #curr.post=crossprod(yerr)/sigmae*0.5+0.5*crossprod(curr.gamma, crossprod(eigmat, curr.gamma))/sigma.gamma
    # curr.post=sum(yerr^2)/sigmae_twice+0.5*crossprod(curr.gamma, eigmat_curr.gamma)/sigma.gamma
    # new.gamma.mean = curr.gamma + tau.gamma_half*(curr.diff*gamma*curr.dgamma/sigmae - eigmat_curr.gamma/sigma.gamma)
    # new.gamma = new.gamma.mean+rnorm(length(gamma),0, sd=sqrt(tau.gamma))
    # new.gamma = as.numeric(new.gamma)
    # new.hgamma = new.gamma^2-lambda^2
    # new.beta=alpha*as.numeric(abs(new.gamma)>lambda)
    # new.yerr = y-X%*%new.beta
    # new.diff = crossprod(X, new.yerr)
    # new.dgamma = dappro2(new.hgamma)*new.gamma
    # curr.gamma.mean = new.gamma + tau.gamma_half*(new.diff*gamma*new.dgamma/sigmae - eigmat%*%new.gamma/sigma.gamma)
    # 
    # #new.post=crossprod(new.yerr)/sigmae*0.5+0.5*crossprod(new.gamma, crossprod(eigmat, new.gamma))/sigma.gamma
    # new.post=sum(new.yerr^2)/sigmae_twice+0.5*crossprod(new.gamma, crossprod(eigmat, new.gamma))/sigma.gamma
    # curr.adiff = new.gamma- new.gamma.mean
    # curr.den = 0.5*sum(curr.adiff^2)/tau.gamma
    # new.adiff=curr.gamma-curr.gamma.mean
    # new.den =0.5* sum(new.adiff^2)/tau.gamma
    # u=runif(1)
    # post.diff=curr.post+curr.den-new.post-new.den
    # if(post.diff>=log(u)){
    #   accept.gamma=accept.gamma+1
    #   gamma=new.gamma
    # }
    # beta=alpha*as.numeric(abs(gamma)>lambda)
    #update alpha
    
    #actset=which(abs(gamma)>lambda)
    actset=seq(1:p)
    non.actset=setdiff(1:p,actset)
    actset_len <- length(actset)
    if(length(non.actset)>0 ){
      alpha[non.actset]=rnorm(length(non.actset),mean=0,sd=sqrt(sigma.alpha))
    }
    
    if(actset_len>0){
      if(actset_len > 1) {
        XA=X[,actset]
        premat = crossprod(XA)/sigmae
        diag(premat) <- diag(premat) + (1/sigma.alpha)
        
        premat_chol <- chol(premat)
        mu <- crossprod(XA, y)/sigmae
        #transpose in function
        #b2 <- forwardsolve(premat_chol, mu, transpose = T)
        b <- backsolve(premat_chol, mu, transpose=T)
        Z <- rnorm(actset_len, 0, 1)
        alpha[actset] <- backsolve(premat_chol, Z+b)
        
      } else {
        XA = X[, actset]
        varx = 1/(sum(XA^2) + 1/sigma.alpha)
        meanx = varx*sum(XA*y)/sigmae
        alpha[actset] = rnorm(1, mean = meanx, sd = sqrt(varx))
      }
    }
    
    #update sigma.gamma
    # a.gamma.posterior=a.gamma+0.5*p
    # b.gamma.posterior=b.gamma+0.5*crossprod(gamma, eigmat%*%gamma)
    # sigma.gamma=rinvgamma(1,a.gamma.posterior,b.gamma.posterior)
    
    #beta=alpha*as.numeric(abs(gamma)>lambda)
    beta=alpha
    a.alpha.posterior = a.alpha + 0.5*p
    b.alpha.posterior = b.alpha + 0.5*sum(alpha^2)
    sigma.alpha = rinvgamma(1, a.alpha.posterior, a.alpha.posterior)
    
    ae=ae0+nrow(X)/2
    be=be0+0.5*sum((y-X%*%beta)^2)
    sigmae=rinvgamma(1,ae,be)
    
    #update epsilon
    # epsilon.new = rnorm(1,mean = epsilon, sd = sqrt(tau.epsilon))
    # sgamma = sum(gamma^2)
    # 
    # sgamma_cal <- sgamma*0.5/sigma.gamma
    # esd_square <- esd^2
    # 
    # elike.curr = 0.5*sum(log(eigen.value+exp(epsilon))) - exp(epsilon)*sgamma_cal - epsilon - 0.5*(epsilon-emu)^2/esd_square
    # eigmat.new = laplacian + exp(epsilon.new)*Ip
    # elike.new = 0.5*sum(log(eigen.value+exp(epsilon.new))) - exp(epsilon.new)*sgamma_cal-epsilon.new- 0.5*(epsilon.new-emu)^2/esd_square
    # eu = runif(1)
    # ediff = elike.new - elike.curr
    # if(ediff>=log(eu)) {
    #   epsilon = epsilon.new
    #   eigmat = eigmat.new
    #   accept.epsilon = accept.epsilon+1
    # }
    
    #update lambda
    # lambda.new=rtruncnorm(1,a=lambda.lower,b=lambda.upper,mean=lambda,sd=sqrt(tau.lambda))
    # #curr.lpost=-crossprod(y-X%*%beta)/sigmae*0.5
    # curr.lpost=-sum((y-X%*%beta)^2)/sigmae*0.5
    # new.beta=alpha*as.numeric(abs(gamma)>lambda.new)
    # new.lpost=-sum((y-X%*%new.beta)^2)/sigmae*0.5
    # dnew = log(dtruncnorm(lambda.new,a=lambda.lower,b=lambda.upper,mean=lambda,sd=sqrt(tau.lambda)))
    # dold = log(dtruncnorm(lambda,a=lambda.lower,b=lambda.upper,mean=lambda.new,sd=sqrt(tau.lambda)))
    # llu=runif(1)
    # ldiff=new.lpost + dold-curr.lpost-dnew
    # if(ldiff>log(llu)){
    #   lambda=lambda.new
    #   beta=new.beta
    #   accept.lambda = accept.lambda+1
    # }
    
    #if(sim<=ntune&sim%%freqTune==0){
      #tau.gamma=adjust_acceptance(accept.gamma/100,tau.gamma,0.5)
      #tau.gamma=adjust_acceptance(accept.gamma/100,tau.gamma,0.3)
      #AcceptGammaTune=c(AcceptGammaTune, accept.gamma/100)
      #tau.lambda=adjust_acceptance(accept.lambda/100,tau.lambda,0.3)
      #tau.epsilon=adjust_acceptance(accept.epsilon/100,tau.epsilon,0.3)
      #accept.gamma=0
      #accept.epsilon=0
      #accept.lambda =0
      
    #}
    if(sim %in% iter){
      idx <- match(sim, iter)
      #Gamma[idx,]=gamma
      Alpha[idx, ] = alpha
      #Sigma.gamma[idx] = sigma.gamma
      Sigma.alpha[idx] = sigma.alpha
      Sigmae[idx] =sigmae
      #Epsilon[idx] = epsilon
      #Lambda[idx]=lambda
      Beta[idx,]=beta
      likelihood[idx] = -sum((y-X%*%beta)^2)/sigmae*0.5
      #AcceptGamma[idx]=accept.gamma
    }
    
    if(sim>burnin & ((sim-burnin) %% nthin == 0)){
      j=sim-burnin
      #fgamma[j/nthin,]=as.numeric(abs(gamma)>lambda)
      fgamma[j/nthin,]=rep(1, p)
    }
    setTxtProgressBar(pb,sim/nsim)
  }
  
  close(pb)
  #use last nest iterations to do the estimation and thin by nthin
  
  #selection probability for each predictor
  gammaSelProb=apply(fgamma,2,mean)
  
  hbeta=Beta
  
  gammaSelId=as.numeric(gammaSelProb>0)
  beta.est=rep(0,p)
  beta.select=which(gammaSelId==1)
  for(j in 1:length(beta.select)) {
    colid =beta.select[j]
    beta.est[colid]=mean(hbeta[fgamma[,colid]==1, colid])
  }
  
  post_summary = data.frame(selectProb = gammaSelProb, betaEst = beta.est)
  #iter = seq((nsim-nest+1),nsim,by=nthin)
  save_mcmc = cbind(iter,Beta,
                    Sigma.alpha,Sigmae,
                    likelihood)
  # save_mcmc = list(Beta=cbind(iter,Beta),
  #                  Alpha=cbind(iter,Alpha),
  #                  #Gamma=cbind(iter,Gamma),
  #                  oth=cbind(iter,Sigma.alpha,
  #                            Sigmae,likelihood))
  colnames(save_mcmc) = c("iter",
                          paste("beta",1:p,sep=""),
                         # paste("gamma",1:p,sep=""),
                         # "sigma_gamma",
                         "sigma_alpha","sigmae","loglik")
  
  return(list(post_summary=post_summary, dat = list(X=X_feat,confound=confound,y=y),
              save_mcmc = save_mcmc))
}
y1zhong/TGLG documentation built on July 18, 2021, 3:52 p.m.