R/rglwbysu.R

Defines functions rglwbysu

Documented in rglwbysu

#' @title
#'
#'  Bayesian univariate analysis of AFT model for selected covariates using regularization method.
#'
#' @description Provides posterior Estimates of selected variable(by regularization technique) in AFT model for univariate in high dimensional gene expression data with MCMC. Incorporated variable selection has been done with regularization technique. It also deals covariates with missing values.
#'
#' @details
#' Here weibull distribution has been used for AFT model with MCMC.
#' This function deals covariates (in data) with missing values. Missing value in any column (covariate) is replaced by mean of that particular covariate.
#'
#' @param m Starting column number of covariates of study from high dimensional entered data.
#' @param n Ending column number of covariates of study from high dimensional entered data.
#' @param STime name of survival time in data
#' @param Event name of event in data. 0 is for censored and 1 for occurrence of event.
#' @param nc number of chain used in model.
#' @param ni number of iteration used in model.
#' @param alpha It is chosen value between 0 and 1 to know the regularization method. alpha=1 for Lasso,
#' alpha=0 for Ridge and alpha between 0 and 1 for elastic net regularization.
#' @param data High dimensional gene expression data that contains event status, survival time and and set of covariates.
#' @return  posterior estimates (Coef, SD, Credible Interval) of regression coefficient for all selected covariate (using regularization) in model and deviance.
#' @import R2jags
#' @import survival
#' @import glmnet
#'
#' @references Prabhash et al(2016) <doi:10.21307/stattrans-2016-046>
#'
#' @examples
#' ##
#' data(hdata)
#' set.seed(1000)
#' rglwbysu(9,45,STime="os",Event="death",2,10,1,hdata)
#' ##
#'
#' @export
#' @author Atanu Bhattacharjee, Gajendra Kumar Vishwakarma and Pragya Kumari
#' @seealso wbysuni,wbysmv, rglwbysm
#'

 rglwbysu=function(m,n,STime,Event,nc,ni,alpha,data){
  a<-alpha
  nr<-nrow(data)

  if(STime!="os"){
    names(data)[names(data) == STime] <- "os"
  }
  if(Event!="death"){
    names(data)[names(data) == Event] <- "death"
  }

  d11 <- subset(data, select = c(m:n))
  le<-length(d11)
  for(i in 1:nr) {
    for(j in 1:le) {
      d11[i,j] = ifelse(is.na(d11[i,j])=="TRUE", mean(d11[,j], na.rm=TRUE), d11[i,j])
    }
  }
  pnt<-NULL
  for(j in 1:le)
  {
    if(sum(d11[,j])==0) {
      pnt<-c(pnt,j)
    }
  }
  if(is.null(pnt)==F){
    d11 <- d11[,-pnt]
  }
  len<-length(d11)
  os<-data$os
  death<-data$death
  x<-as.matrix(d11)
  #set.seed(5000)
  cv.lassoVar<-cv.glmnet(x,Surv(os,death),alpha = a,family = "cox", maxit=1000)

  var_minlmd = coef(cv.lassoVar, cv.lassoVar$lambda.min)
  p <- as.matrix(var_minlmd)
  sg<-subset(p,p[,1]!=0)

  if(nrow(sg)==0){
    stop("Increase number of variables, chosen are not sufficient for variable selection using regularization")
  } else{
    sgnf_var <- rownames(sg)
  }

  ln<-length(sgnf_var)
  pnt1<-NULL
  for(i in 1:ln){
    for(j in 1:len)
    {
      if(colnames(d11[j])==sgnf_var[i]) {
        pnt1<-c(pnt1,j)
      }
    }
  }
  fdt<-subset(d11,select = pnt1)
  d12<-data.frame(data[,c('death','os')],fdt)

  mx<-max(d12$os) + 10
  surt<-ifelse(d12$death == 1, d12$os, NA)
  stcen<-ifelse(d12$death == 0, d12$os, mx)
  d12$os<-surt
  cen<-as.numeric(is.na(surt))
  d12<-data.frame(d12,stcen,cen)

  vv<-fdt
  mtx<-matrix(nrow=ln, ncol = 8)
  colnames(mtx)<-c("coef","SD","2.5%","25%","50%","75%","97.5%","deviance")
  rownames(mtx)<-colnames(fdt)

  for(j in 1:ln){
    data1<-list(os=d12$os, stcen=d12$stcen, cen=d12$cen, v1=vv[,j], N = nr)
    modelj1<-function(){
      for (i in 1:N) {
        sV1[i] <- (v1[i]-mean(v1[]))/sd(v1[])
        os[i] ~ dweib(alpha,lambda[i])
        cen[i] ~ dinterval(os[i],stcen[i])
        lambda[i] <-  log(2)*exp(-mu[i]*sqrt(tau))
        mu[i] <-  beta[1] + beta[2]*sV1[i]
      }
      alpha <- sqrt(tau)

      for(i in 1:2){
        beta[i] ~ dnorm(0,0.000001)
        rm[i] <- exp(beta[i])
        prob[i] <- step(beta[i])
      }
      tau ~ dgamma(0.001,0.001)
      sigma <- sqrt(1/tau)
    }
    inits1 <- function() {
      list(beta=c(0,0), tau=1)
    }
    jagsftt <- jags(model.file=modelj1, data=data1, inits = inits1,
                    parameters.to.save = c('beta','tau','sigma'), n.chains=nc, n.iter = ni)
    f=data.frame(jagsftt$BUGSoutput$summary)
    for(k in 1:7){
      mtx[j,k]<-f[2,k]
    }
    mtx[j,8]<-f[3,1]
   }
    if(nrow(mtx)!=0){
    cat("Estimates for variables:  ", colnames(fdt),"\n")
    return(mtx)
    } else{
    warning("No return value, check for right entry or called for side effects")
    }
}

utils::globalVariables(c("N","v1","sd","tau","step","sd"))

Try the afthd package in your browser

Any scripts or data that you put into this service are public.

afthd documentation built on Oct. 1, 2021, 5:08 p.m.