imi: Implemenets Iterative Multiple Imputation

imi.glm <- function(data.miss,family=binomial(link='logit'),M0=5,max.M=500,epsilon,method='pmm',resp,regressors,
                    conv.plot=TRUE,
                    dis.method='mahalanobis',mah.scale='combined',successive.valid=3,max.iter.glm=1000,
                    print.progress=TRUE){
  # possible values for method:
  # all possible things in MICE,
  # in case mvn is chosen, then it switchs to amelia.
  # possibilities for dis.method and mah.scale
  # possible methods for distance
  # dis.method='euclidean'
  # dis.method='inf.norm'
  # dis.method='mahalanobis'
  # # possible methods for scale in Mahalanobis
  # mah.scale='within'
  # mah.scale='between'
  # mah.scale='combined'
  # successive.valid: length of distances successively smaller than epsilon to stop, minimum is 1
  if (is.character(successive.valid)==FALSE){
    if (successive.valid<1 | floor(successive.valid)!=successive.valid){
      stop('Please enter a postive integer successive.valid')
    }
  }
  if (is.character(M0)==FALSE ){
    if (M0<2){
      stop('Please select an initial number of imputations larger than 1')
    }
  }
  if (is.character(M0)==FALSE ){
    if (floor(M0)!=M0){
      stop('Please select an integer initial number of imputations')
    }
  }
  ## nothing is manual
  if (M0!='manual' & successive.valid!='manual'){
    if (print.progress==TRUE){
      cat('We are working on the initial imputation.',fill = TRUE)
    }
    if (method=='mvn'){
      require(Amelia)
      data.imp0=amelia(data.miss,m=M0,p2s =0)$imputations
    }
    if (method!='mvn' & method!='auto'){
      require(mice)
      data.imp0_ini=mice(data.miss,m=M0,method=method,print=FALSE)
      data.imp0=lapply(seq(M0), function(i) complete(data.imp0_ini, action = i))
    }
    if (method=='auto'){
      require(mice)
      data.imp0_ini=mice(data.miss,m=M0,print=FALSE)
      data.imp0=lapply(seq(M0), function(i) complete(data.imp0_ini, action = i))
    }

    # Now fitting the modle on these
    # first creating the modle formula
    model.expression<-as.formula(paste(paste(resp,"~"),paste(regressors,collapse="+")))
    # fit the model to the first one to find the length of parameters
    model.fit=glm(model.expression,family=family,data=data.imp0[[1]],control = list(maxit = max.iter.glm))
    param.est1=model.fit$coefficients
    param.cov1=vcov(model.fit)
    param.est=matrix(0,M0,length(param.est1))
    param.cov=NULL
    param.est[1,]=param.est1
    param.cov[[1]]=param.cov1
    # now find the estimate for the rest
    for (i in 2:M0){
      model.fit=glm(model.expression,family=family,data=data.imp0[[i]],control = list(maxit = max.iter.glm))
      param.est[i,]=model.fit$coefficients
      param.cov[[i]]=vcov(model.fit)
    }
    comb.mi0=combine.mi(param.est,param.cov)
    data.imp=data.imp0
    # Now we should add imputations till we reach that point
    # variable to define
    #epsilon=0.05

    M=M0
  }
  ## interactive part: both manual
  if (M0=='manual' & successive.valid=='manual'){
    t1=proc.time()
    if (method=='mvn'){
      require(Amelia)
      data.imp0=amelia(data.miss,m=2,p2s =0)$imputations
    }
    if (method!='mvn' & method!='auto'){
      require(mice)
      data.imp0_ini=mice(data.miss,m=2,method=method,print=FALSE)
      data.imp0=lapply(seq(2), function(i) complete(data.imp0_ini, action = i))
    }
    if (method=='auto'){
      require(mice)
      data.imp0_ini=mice(data.miss,m=2,print=FALSE)
      data.imp0=lapply(seq(2), function(i) complete(data.imp0_ini, action = i))
    }

    # Now fitting the modle on these
    # first creating the modle formula
    model.expression<-as.formula(paste(paste(resp,"~"),paste(regressors,collapse="+")))
    # fit the model to the first one to find the length of parameters
    model.fit=glm(model.expression,family=family,data=data.imp0[[1]],control = list(maxit = max.iter.glm))
    param.est1=model.fit$coefficients
    param.cov1=vcov(model.fit)
    param.est=matrix(0,2,length(param.est1))
    param.cov=NULL
    param.est[1,]=param.est1
    param.cov[[1]]=param.cov1
    model.fit=glm(model.expression,family=family,data=data.imp0[[2]],control = list(maxit = max.iter.glm))
    param.est[2,]=model.fit$coefficients
    param.cov[[2]]=vcov(model.fit)
    comb.mi0=combine.mi(param.est,param.cov)
    data.imp=data.imp0
    M=length(data.imp)
    t2=proc.time()


    if ((t2-t1)[3]<0.001){
      print('The time it takes (in seconds) to imput the data two times and fit the model to them is less than 0.001 seconds')

    }
    if ((t2-t1)[3]>=0.001){
      print(paste('The time it takes (in seconds) to imput the data two times and fit the model to them is:',round((t2-t1)[3],3)))
    }
    M0.ini=readline('What is your choice of initial number of imputations?')
    successive.valid.ini=readline('What is your choice for successive steps validation?')


    if (print.progress==TRUE){
      cat('We are working on the initial imputation.',fill = TRUE)
    }
    M0 = as.numeric(unlist(strsplit(M0.ini, ",")))-2
    successive.valid=as.numeric(unlist(strsplit(successive.valid.ini, ",")))
    if (successive.valid<1 | floor(successive.valid)!=successive.valid){
      stop('Please enter a postive integer successive.valid')
    }
    data.imp=data.imp0
    if (floor(M0+1)!=M0+1){
      stop('Please select an integer initial number of imputations')
    }
    if ((M0+2)<2){
      stop('Please select an initial number of imputations larger than 1')
    }
    if (M0>0){
      if (method=='mvn'){
        require(Amelia)
        data.imp0=amelia(data.miss,m=M0,p2s =0)$imputations
      }
      if (method!='mvn' & method!='auto'){
        require(mice)
        data.imp0_ini=mice(data.miss,m=M0,method=method,print=FALSE)
        data.imp0=lapply(seq(M0), function(i) complete(data.imp0_ini, action = i))
      }
      if (method=='auto'){
        require(mice)
        data.imp0_ini=mice(data.miss,m=M0,print=FALSE)
        data.imp0=lapply(seq(M0), function(i) complete(data.imp0_ini, action = i))
      }

      # Now fitting the modle on these
      # first creating the modle formula
      #model.expression<-as.formula(paste(paste(resp,"~"),paste(regressors,collapse="+")))
      # fit the model to the first one to find the length of parameters
      model.fit=glm(model.expression,family=family,data=data.imp0[[1]],control = list(maxit = max.iter.glm))
      param.est1=model.fit$coefficients
      param.cov1=vcov(model.fit)
      #param.est=matrix(0,M0,length(param.est1))
      #param.cov=NULL
      param.est=rbind(param.est,param.est1)
      param.cov[[1+2]]=param.cov1
      # now find the estimate for the rest
      if (M0>1){
        for (i in 2:M0){
          model.fit=glm(model.expression,family=family,data=data.imp0[[i]],control = list(maxit = max.iter.glm))
          param.est=rbind(param.est,model.fit$coefficients)
          param.cov[[i+2]]=vcov(model.fit)
        }
      }

      comb.mi0=combine.mi(param.est,param.cov)
      data.imp[3:(M0+2)]=data.imp0
      # Now we should add imputations till we reach that point
      # variable to define
      #epsilon=0.05

      M=length(data.imp)
    }

  }
  ## interactive part: M0 manual
  if (M0=='manual' & successive.valid!='manual'){
    t1=proc.time()
    if (method=='mvn'){
      require(Amelia)
      data.imp0=amelia(data.miss,m=2,p2s =0)$imputations
    }
    if (method!='mvn' & method!='auto'){
      require(mice)
      data.imp0_ini=mice(data.miss,m=2,method=method,print=FALSE)
      data.imp0=lapply(seq(2), function(i) complete(data.imp0_ini, action = i))
    }
    if (method=='auto'){
      require(mice)
      data.imp0_ini=mice(data.miss,m=2,print=FALSE)
      data.imp0=lapply(seq(2), function(i) complete(data.imp0_ini, action = i))
    }

    # Now fitting the modle on these
    # first creating the modle formula
    model.expression<-as.formula(paste(paste(resp,"~"),paste(regressors,collapse="+")))
    # fit the model to the first one to find the length of parameters
    model.fit=glm(model.expression,family=family,data=data.imp0[[1]],control = list(maxit = max.iter.glm))
    param.est1=model.fit$coefficients
    param.cov1=vcov(model.fit)
    param.est=matrix(0,2,length(param.est1))
    param.cov=NULL
    param.est[1,]=param.est1
    param.cov[[1]]=param.cov1
    model.fit=glm(model.expression,family=family,data=data.imp0[[2]],control = list(maxit = max.iter.glm))
    param.est[2,]=model.fit$coefficients
    param.cov[[2]]=vcov(model.fit)
    comb.mi0=combine.mi(param.est,param.cov)
    data.imp=data.imp0
    M=length(data.imp)
    t2=proc.time()

    if ((t2-t1)[3]<0.001){
      print('The time it takes (in seconds) to imput the data two times and fit the model to them is less than 0.001 seconds')

    }
    if ((t2-t1)[3]>=0.001){
      print(paste('The time it takes (in seconds) to imput the data two times and fit the model to them is:',round((t2-t1)[3],3)))
    }
    M0.ini=readline('What is your choice of initial number of imputations?')

    if (print.progress==TRUE){
      cat('We are working on the initial imputation.',fill = TRUE)
    }
    M0 = as.numeric(unlist(strsplit(M0.ini, ",")))-2
    data.imp=data.imp0
    if (floor(M0+1)!=M0+1){
      stop('Please select an integer initial number of imputations')
    }
    if ((M0+2)<2){
      stop('Please select an initial number of imputations larger than 1')
    }
    if (M0>0){
      if (method=='mvn'){
        require(Amelia)
        data.imp0=amelia(data.miss,m=M0,p2s =0)$imputations
      }
      if (method!='mvn' & method!='auto'){
        require(mice)
        data.imp0_ini=mice(data.miss,m=M0,method=method,print=FALSE)
        data.imp0=lapply(seq(M0), function(i) complete(data.imp0_ini, action = i))
      }
      if (method=='auto'){
        require(mice)
        data.imp0_ini=mice(data.miss,m=M0,print=FALSE)
        data.imp0=lapply(seq(M0), function(i) complete(data.imp0_ini, action = i))
      }

      # Now fitting the modle on these
      # first creating the modle formula
      #model.expression<-as.formula(paste(paste(resp,"~"),paste(regressors,collapse="+")))
      # fit the model to the first one to find the length of parameters
      model.fit=glm(model.expression,family=family,data=data.imp0[[1]],control = list(maxit = max.iter.glm))
      param.est1=model.fit$coefficients
      param.cov1=vcov(model.fit)
      #param.est=matrix(0,M0,length(param.est1))
      #param.cov=NULL
      param.est=rbind(param.est,param.est1)
      param.cov[[1+2]]=param.cov1
      # now find the estimate for the rest
      if (M0>1){
        for (i in 2:M0){
          model.fit=glm(model.expression,family=family,data=data.imp0[[i]],control = list(maxit = max.iter.glm))
          param.est=rbind(param.est,model.fit$coefficients)
          param.cov[[i+2]]=vcov(model.fit)
        }
      }

      comb.mi0=combine.mi(param.est,param.cov)
      data.imp[3:(M0+2)]=data.imp0
      # Now we should add imputations till we reach that point
      # variable to define
      #epsilon=0.05

      M=length(data.imp)
    }

  }
  ## interactive part: successive.valid is manual
  if (M0!='manual' & successive.valid=='manual'){
    t1=proc.time()
    if (method=='mvn'){
      require(Amelia)
      data.imp0=amelia(data.miss,m=2,p2s =0)$imputations
    }
    if (method!='mvn' & method!='auto'){
      require(mice)
      data.imp0_ini=mice(data.miss,m=2,method=method,print=FALSE)
      data.imp0=lapply(seq(2), function(i) complete(data.imp0_ini, action = i))
    }
    if (method=='auto'){
      require(mice)
      data.imp0_ini=mice(data.miss,m=2,print=FALSE)
      data.imp0=lapply(seq(2), function(i) complete(data.imp0_ini, action = i))
    }

    # Now fitting the modle on these
    # first creating the modle formula
    model.expression<-as.formula(paste(paste(resp,"~"),paste(regressors,collapse="+")))
    # fit the model to the first one to find the length of parameters
    model.fit=glm(model.expression,family=family,data=data.imp0[[1]],control = list(maxit = max.iter.glm))
    param.est1=model.fit$coefficients
    param.cov1=vcov(model.fit)
    param.est=matrix(0,2,length(param.est1))
    param.cov=NULL
    param.est[1,]=param.est1
    param.cov[[1]]=param.cov1
    model.fit=glm(model.expression,family=family,data=data.imp0[[2]],control = list(maxit = max.iter.glm))
    param.est[2,]=model.fit$coefficients
    param.cov[[2]]=vcov(model.fit)
    comb.mi0=combine.mi(param.est,param.cov)
    data.imp=data.imp0
    M=length(data.imp)
    t2=proc.time()

    if ((t2-t1)[3]<0.001){
      print('The time it takes (in seconds) to imput the data two times and fit the model to them is less than 0.001 seconds')

    }
    if ((t2-t1)[3]>=0.001){
      print(paste('The time it takes (in seconds) to imput the data two times and fit the model to them is:',round((t2-t1)[3],3)))
    }
    successive.valid.ini=readline('What is your choice for successive steps validation?')

    if (print.progress==TRUE){
      cat('We are working on the initial imputation.',fill = TRUE)
    }
    successive.valid=as.numeric(unlist(strsplit(successive.valid.ini, ",")))
    if (successive.valid<1 | floor(successive.valid)!=successive.valid){
      stop('Please enter a postive integer successive.valid')
    }
    data.imp=data.imp0
    if (floor(M0+1)!=M0+1){
      stop('Please select an integer initial number of imputations')
    }
    if ((M0+2)<2){
      stop('Please select an initial number of imputations larger than 1')
    }
    if (M0>0){
      if (method=='mvn'){
        require(Amelia)
        data.imp0=amelia(data.miss,m=M0,p2s =0)$imputations
      }
      if (method!='mvn' & method!='auto'){
        require(mice)
        data.imp0_ini=mice(data.miss,m=M0,method=method,print=FALSE)
        data.imp0=lapply(seq(M0), function(i) complete(data.imp0_ini, action = i))
      }
      if (method=='auto'){
        require(mice)
        data.imp0_ini=mice(data.miss,m=M0,print=FALSE)
        data.imp0=lapply(seq(M0), function(i) complete(data.imp0_ini, action = i))
      }

      # Now fitting the modle on these
      # first creating the modle formula
      #model.expression<-as.formula(paste(paste(resp,"~"),paste(regressors,collapse="+")))
      # fit the model to the first one to find the length of parameters
      model.fit=glm(model.expression,family=family,data=data.imp0[[1]],control = list(maxit = max.iter.glm))
      param.est1=model.fit$coefficients
      param.cov1=vcov(model.fit)
      #param.est=matrix(0,M0,length(param.est1))
      #param.cov=NULL
      param.est=rbind(param.est,param.est1)
      param.cov[[1+2]]=param.cov1
      # now find the estimate for the rest
      if (M0>1){
        for (i in 2:M0){
          model.fit=glm(model.expression,family=family,data=data.imp0[[i]],control = list(maxit = max.iter.glm))
          param.est=rbind(param.est,model.fit$coefficients)
          param.cov[[i+2]]=vcov(model.fit)
        }
      }

      comb.mi0=combine.mi(param.est,param.cov)
      data.imp[3:(M0+2)]=data.imp0
      # Now we should add imputations till we reach that point
      # variable to define
      #epsilon=0.05

      M=length(data.imp)
    }

  }






  # Doing the rest of initial imputations

  #max.M=500
  dis.extra=epsilon+1
  dis.all=0
  while(sum(dis.extra>epsilon)>0 & M<max.M){
    M=M+1
    if (print.progress=='TRUE'){
      cat(paste('We are working on imputed dataset number ',M),fill = TRUE)
    }

    if (method=='mvn'){
      data.imp1=amelia(data.miss,m=1,p2s =0)$imputations$imp1
      data.imp[[M]]=data.imp1
    }

    if (method!='mvn' & method!='auto'){
      data.imp1_ini=mice(data.miss,m=1,method=method,print=FALSE)
      data.imp1=complete(data.imp1_ini, action = 1)
      data.imp[[M]]=data.imp1
    }
    if (method=='auto'){
      data.imp1_ini=mice(data.miss,m=1,print=FALSE)
      data.imp1=complete(data.imp1_ini, action = 1)
      data.imp[[M]]=data.imp1
    }

    # now fit the model to the new imputed data
    model.fit=glm(model.expression,family=family,data=data.imp1,control = list(maxit = max.iter.glm))
    param.est=rbind(param.est,model.fit$coefficients)
    param.cov[[M]]=vcov(model.fit)
    comb.mi1=combine.mi(param.est,param.cov)
    # now compute the distance between those two based on the specified method
    diff.est=comb.mi1$param.est-comb.mi0$param.est
    if (dis.method=='euclidean'){
      dis=sqrt(t(diff.est)%*%diff.est)
    }
    if (dis.method=='inf.norm'){
      dis=max(abs(diff.est))
    }
    if (dis.method=='mahalanobis'){
      # to compute generalized inverse
      require('MASS')
      if (mah.scale=='within'){
        #S.mah=comb.mi0$within.cov + comb.mi1$within.cov
        S.mah= comb.mi1$within.cov

      }
      if (mah.scale=='between'){
        #S.mah=comb.mi0$between.cov + comb.mi1$between.cov
        S.mah= comb.mi1$between.cov

      }
      if (mah.scale=='combined'){
        #S.mah=comb.mi0$param.cov + comb.mi1$param.cov
        S.mah=comb.mi1$param.cov
      }
      dis=sqrt((t(diff.est)%*%ginv(S.mah))%*%diff.est)
    }
    comb.mi0=comb.mi1
    dis.all=c(dis.all,dis)
    dis.extra=tail(dis.all,n=successive.valid)
  }
  dis.all=dis.all[-1]
  if (conv.plot==TRUE){
    plot(dis.all,xlab='Number of imputations',ylab='Distance')
  }
  conv.status=0
  if (sum(dis.extra<epsilon)>0){
    conv.status=1
  }
  if (print.progress==TRUE){
    if (conv.status==1){
      cat(paste('We are done! The convergence is acheived and the sufficient number of imputations is ',M),
          fill = TRUE)
    }
    if (conv.status==0){
      cat('The convergence could not be achieved, please increase max.M or deacrese espsilon or
          number of successive validations steps.',
          fill = TRUE)
    }
  }
  return(list(mi.param=comb.mi1,data.imp=data.imp,dis.steps=dis.all,conv.status=conv.status,M=M))}