R/augSIMEX.R
In augSIMEX: Analysis of Data with Mixed Measurement Error and Misclassification in Covariates

Documented in augSIMEX

augSIMEX<-function(mainformula = formula(data), mismodel = pi|qi~1, meformula = NULL, family = gaussian,
                   data,validationdata,
                   err.var, mis.var, err.true, mis.true, err.mat = NULL, cppmethod = TRUE,
                   repeated = FALSE, repind=list(),
                   subset,  offset, weights, na.action, scorefunction=NULL,
                   lambda = NULL, M = 5, B = 20, nBoot = 50, extrapolation = c("quadratic","linear"),
                   bound = 8, initial = NULL,...)

{ call <- match.call()

  if (!missing(family)){
    ### makesure family is matched
    if (is.character(family))
      family <- get(family, mode = "function", envir = parent.frame())
    if (is.function(family))
      family <- family()
    if (is.null(family$family)) {
      print(family)
      stop("'family' not recognized")
    }
    
    ### Obtain the function in the family object
    variance <- family$variance
    linkinv <- family$linkinv
    mu.eta <- family$mu.eta
    if (!is.function(variance) || !is.function(linkinv))
      stop("'family' argument seems not to be a valid family object",
           call. = FALSE)
    dev.resids <- family$dev.resids
    aic <- family$aic
    
    unless.null <- function(x, if.null) if (is.null(x))
      if.null
    else x
    valideta <- unless.null(family$valideta, function(eta) TRUE)
    validmu <- unless.null(family$validmu, function(mu) TRUE)
    
    if (family$family=="gaussian") {dispersionpar = TRUE} else {
      dispersionpar = FALSE
    }
    ### check if fast approach available
    cppmethod<-fastapproach(family,cppmethod)$fast
    scorefun<-fastapproach(family,cppmethod)$scorefun
  } else {
    if (is.null(scorefunction)) stop("Family is missing and scorefunction is not specified. ")
    sfun <-scorefunction
    scorefun<-score.modifieduser
  }

 

  ### setup the weights and offset
  if (missing(weights)) weights<-NULL
  if (missing(offset))  offset<-NULL

  if ((repeated) & (length(repind)==0) ){
     stop("The repind should be specified for each repeated covariates.")}

  ### make data into design matrix
  if (missing(data)) data <- environment(mainformula)
  if (repeated) {
    if (!missing(repind)) {
      for (i in 1:length(names(repind))){
        if (!names(repind)[i] %in% names(data)) {
          data<-cbind(data,rep(0,dim(data)[1]))
          colnames(data)[dim(data)[2]]<-names(repind)[i]}
        }
    }
  }
  mf <- match.call(expand.dots = FALSE)
  m <- match(c("mainformula", "data", "subset","weights", "na.action", "offset"), names(mf), 0L)
  mf <- mf[c(1L, m)]
  if (!repeated) {names(mf)[2]<-"formula"} else {
    names(mf)[2]<-"formula"
    mf$data <- data
  }
  mf$drop.unused.levels <- TRUE
  mf[[1L]] <- quote(stats::model.frame)
  mf <- eval(mf, parent.frame())
  mt <- attr(mf, "terms")
  xlevels <- .getXlevels(mt, mf)
  Response <- model.response(mf, "any")
  nsize<-length(Response)
  if (length(dim(nsize)) == 1L) {
    nm <- rownames(nsize)
    dim(nsize) <- NULL
    if (!is.null(nm))
      names(nsize) <- nm
  }
  mainCovariates <- if (!is.empty.model(mt))
    model.matrix(mt, mf, contrasts) else matrix(, nsize, 0L)
  contrasts <- attr(mainCovariates, "contrasts")
  if (all(mainCovariates[,1]==1)) intercept<-TRUE
  weights <- as.vector(model.weights(mf))
  if (!is.null(weights) && !is.numeric(weights))
    stop("'weights' must be a numeric vector")
  if (!is.null(weights) && any(weights < 0))
    stop("negative weights not allowed")
  offset <- as.vector(model.offset(mf))
  if (!is.null(offset)) {
    if (length(offset) != nsize)
      stop(gettextf("number of offsets is %d should equal %d (number of observations)",
                    length(offset), nsize), domain = NA)
  }else {offset<- rep.int(0, nsize)}
  if (is.null(weights)) weights <- rep.int(1, nsize)
   else { if (length(weights) != nsize)
     stop(gettextf("number of weights is %d should equal %d (number of observations)",
                       length(weights), nsize), domain = NA)}
  qr <- qr(mainCovariates)

  ### rearrange the validationData
  val.covariates<-c(mis.var,err.var,err.true, mis.true)
  if (missing(validationdata)){ validationdata<-mget(unique(val.covariates),envir=parent.frame())}
  if (!repeated){
    if (!all(val.covariates %in% names(validationdata)))
      {stop(paste("Variable",
                   val.covariates[!val.covariates %in% names(validationdata)],
                   "is not included in the validation data."))}}
   else {if (!all(c(mis.var,mis.true) %in% names(validationdata))) {
     stop(paste("Variable",
                mis.var, "or", mis.true,
                "is not included in the validation data."))
   }}
  colname<-colnames(data)

  ## SIMEXvariable
  err.var = unique(err.var)
  nerr.var = length(err.var)
  mis.var = unique(mis.var)
  nmis.var = length(mis.var)
  if (!is.character(err.var) | nerr.var > length(colname)) {
    stop("Invalid err.var object")
  }
  if (!is.character(mis.var) | nmis.var > length(colname)) {
    stop("Invalid mis.var object")
  }
  if (!all(err.var %in% colname)) {
    stop("Error-prone variable must be selected from the data")
  }
  if (!all(mis.var %in% colname)) {
    stop("Misspecified variable must be included in the data")
  }
  if (!(repeated == FALSE) & !(repeated == TRUE)) {
    stop("Repeated indicator should only be 'TRUE' or 'FALSE'. ")
  }
  if (!is.null(err.mat)) {
    err.mat = as.matrix(err.mat)
    if (!is.numeric(err.mat) | any(err.mat < 0)) {
      stop("Invalide err.mat object, err.mat must be a square symmetric numeric matrix")
    }
    if (nrow(err.mat) != ncol(err.mat)) {
      stop("err.mat must be a square matrix")
    }
    if (length(err.var) != nrow(err.mat)) {
      stop("SIMEXvariable and err.mat have non-conforming size")
    }
    SSigma <- err.mat
    dimnames(SSigma) <- NULL
    if (!isTRUE(all.equal(SSigma, t(SSigma)))) {
      warning("err.mat is numerically not symmetric")
    }
  }


  ###SIMEX parameters
  if (length(B) != 1) {
    stop("B must be positive integer")
  }
  if (!is.numeric(B) | B <= 0) {
    stop("B must be positive integer")
  }
  else {
    B = ceiling(B)
  }
  if (is.null(lambda)) {lambda<-seq(from=0,to=2,length.out=M)}
  if (!is.vector(lambda) | !is.numeric(lambda)) {
    stop(":Invalide lambda object")
  }
  if (any(lambda < 0)) {
    warning("Lambda should be positive values. Negative values will be ignored",
            call. = FALSE)
    lambda <- lambda[lambda >= 0]
  }

  # extrapolation<-match.arg(extrapolation)

  ### Step 1: Simulation step
  temp.Results1<-Getalpha(validationdata,as.Formula(mismodel),mis.var,mis.true,err.var)
  alphahat1<-temp.Results1$alphahat1
  alphahat2<-temp.Results1$alphahat2

  ### estimate the empirical covariance matrix
  if (!repeated){  
    if (missing(err.mat)|is.null(err.mat)){
      if (length(meformula)==0) {
      Models_res<-lapply(1:length(err.var),FUN = function(i){
        model<-lm(validationdata[,err.var[i]]~-1+offset(validationdata[,err.true[i]]),data=data.frame(validationdata))
        return(model$residuals)
      })} else {
        meformula = as.Formula(meformula)
        if (length(meformula)[1]!=length(err.var)) {stop(gettextf("the length of meformula responses (left hand side) is %d and should be equal to the number of error-prone covariates %d",
                                                  length(meformula), length(err.var)), domain = NA)}
      resname.me <- all.vars(meformula)[1:length(meformula)[1]]
      me.mf <- model.frame(meformula, data = validationdata, na.action = na.omit)
      
      index<-match(err.var,resname.me)
      if (length(index)!=length(err.var)) {stop("incorrect measurement error model specified")}
      if (length(meformula)[2]==1) {
          Models_res<-lapply(index,FUN = function(i){
            model<-lm(formula(meformula,lhs=i,rhs=1),data = me.mf)
            return(model$residuals)
          })
        } else {
          Models_res<-lapply(index,FUN = function(i){
            model<-lm(formula(meformula,lhs=i,rhs=i),data = me.mf)
            return(model$residuals)
          })
        }
      }
      Models_res<-matrix(unlist(Models_res),ncol=length(err.var))
      Sigma_ehat<-cov(Models_res)
    } else Sigma_ehat<-cov(err.mat)
  }
  
  ### change the Xstar in mainCovariates into X, Zstar into Z
  changeindex<-match(err.var,colnames(mainCovariates))
  colnames(mainCovariates)[changeindex]<-err.true
  colnames(mainCovariates)[which(colnames(mainCovariates)==mis.var)]<-mis.true

  main.new<-mainCovariates
  Wnames<- setdiff(colnames(main.new),c(val.covariates,unlist(repind)))
  Wmatrix<-main.new[,Wnames]
  NSam<-dim(main.new)[1]
  nbeta<-dim(mainCovariates)[2]
  
  if (dispersionpar) {
    nbeta <- nbeta + 1
    if (is.null(initial)) { initialv <- c(rep(0,nbeta-1),0.25)} else{
      if (length(initial)!=nbeta) {stop(paste0("The length of initial values should be ",nbeta,"."))}
      initialv <- initial 
     }
  } else {
    if (is.null(initial)) {initialv <- rep(0,nbeta)} else {
      if (length(initial)!=nbeta) {stop(paste0("The length of initial values should be ",nbeta,"."))}
      initialv <- initial 
    }
    }
  
  if (repeated) {
      if (options("na.action")=="na.omit") {
        completeindex <- complete.cases(data[,! colname %in% unlist(repind)])
        imputeData <- data[completeindex,] } else imputeData<-data 
  } else imputeData<-mainCovariates

  betahat<-lapply(1:M,FUN=function(i){
    betab_lam<-lapply(1:B,FUN=function(x){
      X.impute<-imputeX(imputeData,err.true,err.var,lambda[i],Sigma_ehat,nsize,repeated,repind)
      main.new[,err.true]<-X.impute
      main.new.df<-data.frame(main.new)

      phat0<-predict(alphahat1,newdata=main.new.df,type="response")
      qhat0<-predict(alphahat2,newdata=main.new.df,type="response")

      DataM <- cbind(X.impute,Wmatrix,main.new[,mis.true])
      DataM0 <- cbind(X.impute,Wmatrix,rep(0,nsize))
      DataM1 <- cbind(X.impute,Wmatrix,rep(1,nsize))

      if (is.null(scorefunction)){
          if (cppmethod) {betasolve<-nleqslv(initialv,scorefun,Y=Response,DataM=DataM,DataM0=DataM0,DataM1=DataM1,phat=phat0,qhat=qhat0,weight=weights,offset=offset)}
          else {betasolve<-nleqslv(initialv,scorefun,Y=Response,DataM=DataM,DataM0=DataM0,DataM1=DataM1,phat=phat0,qhat=qhat0,weight=weights,offset=offset,linkinv=linkinv,var=variance,mueta=mu.eta)}
      } else {betasolve<-nleqslv(initialv,scorefun,Y=Response,DataM=DataM,DataM0=DataM0,DataM1=DataM1,phat=phat0,qhat=qhat0,weight=weights,offset=offset,sfun=sfun)}
      betareturn <- betasolve$x
      
      

      if (any(abs(betareturn)>bound, na.rm = T)) { return(rep(NA,nbeta*2))}

      return(betareturn)
    })
    
    betahat_lam_M<-matrix(unlist(betab_lam),ncol=nbeta,byrow=T)
    betahat_lam<-colMeans(betahat_lam_M[,1:nbeta],na.rm=T)
    return(c(betahat_lam))
  })

  betamatrix<-matrix(unlist(betahat),ncol=nbeta,byrow=T)
  coefs <- extrapolate(extrapolation,betamatrix,lambda,nbeta)
  if (dispersionpar){
    if (extrapolation=="both"){coefs <- coefs[c(1:(nbeta-1),(nbeta+1):(nbeta*2-1))]}
    else {coefs <- coefs[c(1:(nbeta-1))]}
    betamatrix <- betamatrix[,c(1:(nbeta-1))]
  }
  
  ### bootstrap procedure
  betahat_boot_all<-lapply(1:nBoot,FUN=function(t){

    sample.boot<-sample(NSam,replace=T)
    imputeData_boot<-imputeData[sample.boot,]
    main.new.boot<-main.new[sample.boot,]
    Wmatrix_b<-main.new.boot[,Wnames]
    Response_b<-Response[sample.boot]
    
    betahat_boot<-lapply(1:M,FUN=function(i){
      betab_lam<-lapply(1:B,FUN=function(x){
        X.impute<-imputeX(imputeData_boot,err.true,err.var,lambda[i],Sigma_ehat,nsize,repeated,repind)
        options(warn=-1)
        main.new.boot[,err.true]<-X.impute
        main.new.df<-data.frame(main.new.boot)
        
        phat0<-predict(alphahat1,newdata=main.new.df,type="response")
        qhat0<-predict(alphahat2,newdata=main.new.df,type="response")
        
        DataM <- cbind(X.impute,Wmatrix_b,main.new.boot[,mis.true])
        DataM0 <- cbind(X.impute,Wmatrix_b,rep(0,nsize))
        DataM1 <- cbind(X.impute,Wmatrix_b,rep(1,nsize))
        options(warn=0)
        
        
        if (is.null(scorefunction)){
            if (cppmethod) {betasolve<-nleqslv(initialv,scorefun,Y=Response_b,DataM=DataM,DataM0=DataM0,DataM1=DataM1,phat=phat0,qhat=qhat0,weight=weights[sample.boot],offset=offset[sample.boot])}
            else {betasolve<-nleqslv(initialv,scorefun,Y=Response_b,DataM=DataM,DataM0=DataM0,DataM1=DataM1,phat=phat0,qhat=qhat0,weight=weights[sample.boot],offset=offset[sample.boot],linkinv=linkinv,var=variance,mueta=mu.eta)}
        } else {betasolve<-nleqslv(initialv,scorefun,Y=Response_b,DataM=DataM,DataM0=DataM0,DataM1=DataM1,phat=phat0,qhat=qhat0,weight=weights[sample.boot],offset=offset[sample.boot],sfun=sfun)}
        betareturn <- betasolve$x
        
        if (any(abs(betareturn)>bound,na.rm=T)) { return(rep(NA,nbeta*2))}
        
        return(betareturn)
      })
      
      betahat_lam_M<-matrix(unlist(betab_lam),ncol=nbeta,byrow=T)
      betahat_lam<-colMeans(betahat_lam_M[,1:nbeta],na.rm=T)
      return(c(betahat_lam))
    })

    betamatrix_boot<-matrix(unlist(betahat_boot),ncol=nbeta,byrow=T)
    coefs <- tryCatch(extrapolate(extrapolation,betamatrix_boot,lambda,nbeta),error = function(e) {
      if (extrapolation=="both") return(rep(NA,2*nbeta)) else return(rep(NA,nbeta))})
     
    return(coefs)
  })
  
  if (extrapolation=="both"){
    if (dispersionpar){
      betahat_boot_all_M<-matrix(unlist(betahat_boot_all),
                                 ncol=nbeta*2,byrow=T)
      vcov<-apply(betahat_boot_all_M[,c(1:(nbeta-1),(nbeta+1):(nbeta*2-1))],MARGIN = 2,FUN=sd, na.rm=T)
      nbeta <- nbeta -1 
    } else{
      betahat_boot_all_M<-matrix(unlist(betahat_boot_all),ncol=nbeta*2,byrow=T)
      vcov<-apply(betahat_boot_all_M,MARGIN = 2,FUN=sd, na.rm=T)
    }
  } else{
    if (dispersionpar){
      betahat_boot_all_M<-matrix(unlist(betahat_boot_all),ncol=nbeta,byrow=T)
      vcov<-var(betahat_boot_all_M[,c(1:(nbeta-1))]) 
      nbeta <- nbeta -1 
    } else{
      betahat_boot_all_M<-matrix(unlist(betahat_boot_all),ncol=nbeta,byrow=T)
      vcov<-var(betahat_boot_all_M) 
    }
  }
  
  
  ### results wrap-up
  good <- weights > 0
  if (extrapolation=="both") {
    names(coefs)<-names(vcov)<-rep(c(err.true,Wnames,mis.true),2)
  } else {names(coefs)<-names(vcov)<-c(err.true,Wnames,mis.true)}

  if  (extrapolation=="both") { colnames(betamatrix)<-names(coefs)[1:nbeta]} else{
  colnames(betamatrix)<-c(names(coefs))}
  
  ### evaluate the performance from glm theory
  if (is.null(scorefunction)){
    eta <- drop(mainCovariates[,names(coefs)] %*% t(t(coefs)))
    mu <- linkinv(eta <- eta + offset)
    
    varmu <- variance(mu)[good]
    if (anyNA(varmu))
      stop("NAs in V(mu)")
    if (any(varmu == 0))
      stop("0s in V(mu)")
    mu.eta.val <- mu.eta(eta)
    if (any(is.na(mu.eta.val[good])))
      stop("NAs in d(mu)/d(eta)")
    
    residuals <- (Response - mu)/mu.eta(eta)
    dev<-sum(dev.resids(Response, mu, weights))
    wtdmu <- if (intercept) sum(weights * Response)/sum(weights) else linkinv(offset)
    nulldev <- sum(dev.resids(Response, wtdmu, weights))
    ## calculate df
    n.ok <- nsize - sum(weights==0)
    nulldf <- n.ok - as.integer(intercept)
    rank <- length(coefs)
    resdf  <- n.ok - rank
    ## calculate AIC
    aic.model <- aic(Response, 0, mu, weights, dev) + 2*rank
  } else {
    eta <- NA
    mu <- NA
    varmu <- NA
    residuals <- NA
    dev<- NA
    wtdmu <- NA
    nulldev <- NA
    ## calculate df
    n.ok <- nsize - sum(weights==0)
    nulldf <- n.ok - as.integer(intercept)
    rank <- length(coefs)
    resdf  <- n.ok - rank
    ## calculate AIC
    aic.model <- NA
  }
  
  
  ### Move the intercept in the front of parameters
  if ((intercept) && (extrapolation!="both")) {
    intindex <- which(names(coefs)=="(Intercept)")
    otherindex <- 1:nbeta
    otherindex <- otherindex[-intindex]
    coefs <- coefs[c(intindex,otherindex)]
    vcov <- vcov[c(intindex,otherindex),c(intindex,otherindex)]
    betamatrix <- betamatrix[,c(intindex,otherindex)]
  }


  output<-list(coefficients = coefs,
            vcov = vcov, fitted.values = mu,
            call = call, family = family,
            formula = mainformula, mismodel = mismodel, terms = mt,
            err.var = err.var, mis.var = mis.var, err.true = err.true, mis.true = mis.true, err.mat = err.mat,
            M = M, B = B, nBoot = nBoot, extrapolation = extrapolation, weights = weights,
            lambda = lambda, coefmatrix = betamatrix, linear.predictors = eta, deviance = dev, aic = aic.model,
            rank = rank, null.deviance = nulldev, df.residual = resdf, df.null = nulldf, residuals = residuals, 
            qr = qr, x = mainCovariates, y = Response, model = mf, contrasts = contrasts, xlevels = xlevels)

  class(output)<- "augSIMEX"

  return(output)

}
Any scripts or data that you put into this service are public.
augSIMEX documentation built on April 23, 2020, 5:08 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
augSIMEX
Analysis of Data with Mixed Measurement Error and Misclassification in Covariates

R/augSIMEX.R
In augSIMEX: Analysis of Data with Mixed Measurement Error and Misclassification in Covariates

Defines functions augSIMEX

Documented in augSIMEX

Try the augSIMEX package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

augSIMEX Analysis of Data with Mixed Measurement Error and Misclassification in Covariates

R/augSIMEX.R In augSIMEX: Analysis of Data with Mixed Measurement Error and Misclassification in Covariates

Defines functions augSIMEX

Documented in augSIMEX

Try the augSIMEX package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

augSIMEX
Analysis of Data with Mixed Measurement Error and Misclassification in Covariates

R/augSIMEX.R
In augSIMEX: Analysis of Data with Mixed Measurement Error and Misclassification in Covariates