R/pmvd.R
In sensitivity: Global Sensitivity Analysis of Model Outputs and Importance Measures

Documented in plot.pmvd pmvd print.pmvd

############################################
# Needed packages
#library(parallel)
#library(gtools)
#library(boot)

############################################
#R2 estimation
estim.R2<-function(dataX,y, subset, logistic, any.cat, max.iter){
  # Function estim.R2:
  #   Computes the R2 (explained variance) statistic for linear/logistic
  #   regression for a specified sub-model (subset of inputs)
  # Inputs:
  #   dataX: data.frame/matrix
  #   y: num/factor/logical vector
  #   subset: integer vector, subset of inputs for which to compute the R2
  #   logistic: logical. If TRUE, then logistic regression is performed
  #   any.cat: logical. If TRUE, then at least one column of dataX is a factor.
  # Output:
  #   R2: num. R2 value for the desired model (linear/logistic) and for the
  #       specified subset of inputs.

  if(!logistic){
    #######################
    #Linear Model

    if(!any.cat){
      ################################################################
      # Computing by hand with scalar inputs: approx 7x faster than lm

      #Subsetting X
      X_<-as.matrix(dataX[,as.vector(subset)])

      #Adding beta_0
      X_<-cbind(rep(1, nrow(X_)), X_)

      #Computing beta estimates
      estim.beta<-solve(t(X_)%*%X_)%*%t(X_)%*%y

      #Computing R2 estimate
      R2<-1-(var(y-X_%*%estim.beta)/var(y))
    }else{
      #################################################################
      # Using lm method if there is any categorical inputs

      #Subsetting X
      X_<-dataX[,as.vector(subset)]
      dat<-data.frame(y,X_)

      #Computing R2 estimate
      R2<-summary(lm(y~., data=dat))$r.squared
    }
  }else{
    #######################
    #Logistic Model
    #Maximum optimization iterations
    fit.control=glm.control(maxit=max.iter)
    #Subsetting X
    X_<-dataX[,as.vector(subset)]
    dat<-data.frame(y,X_)
    sum_glm<-summary(glm(y~., data=dat, 
                         family="binomial",
                         control=fit.control))

    #Computing Logistic R2 estimate
    R2<-1-(sum_glm$deviance/sum_glm$null.deviance)
  }
  return(R2)
}

######################################
#Recursive PMVD computation
calc.pmvd.rec.boot<-function(X, y, indices, logistic, max.iter,tol, any.cat, i=1:nrow(X)){
  # Function calc.pmvd.rec.boot
  #   Computes the PMVD indices using the recursive algorithm of Feldman (2005)
  #   for bootstrap confidence interval computations.
  # Inputs:
  #   X: matrix of data frame.
  #   y: num/factor/logical vector
  #   d: int. Covariates dimension.
  #   indices: list. List of subset indices.
  #   logistic: logical. If TRUE, logistic regression is performed, else, linear
  #             model instead.
  #   any.cat: logical. If TRUE, then at least one column of dataX is a factor.
  #   i: integer vector. Used for bootstrap estimation.
  # Output:
  #   res: (1xd) matrix. PMVD value for all covariates.

  #################################
  # Pre-Processing
  #Dimension
  d=ncol(X)
  #Bootstrap row selection
  X<-X[i,]
  y<-matrix(y,ncol=1)[i,]

  #Initiating the conditional elements object
  R2s<-rep(list(0), d+1)

  #################################
  # Conditional elements estimation
  for(j in 1:d){
      #############################
      #R2 estimation
      R2s[[j+1]]<-apply(indices[[j+1]], 2,estim.R2,
                        dataX=X,
                        y=y,
                        logistic=logistic,
                        any.cat=any.cat,
                        max.iter=max.iter)
  }

  #################################
  # PMVD indices computation
  
  #Preparing the recursive computation of the PMVD indices
  #Full model indices
  N=1:d
  #Initiating P(S)
  Ps<-rep(list(0), d+1)
  #Computing w({i}) forall i
  Ps[[1]]=rev(R2s[[d+1]] - R2s[[d]])
  
  #Finding if any w({i})=0 or |w({i})|<tol
  if(is.null(tol)){
    logi.zeros<-Ps[[1]]==0
  }else{
    logi.zeros<-abs(Ps[[1]])<tol
  }
  
  if(any(logi.zeros)){
    var_null<-which(logi.zeros)
    nb_null<-length(var_null)
    idx_ind_null=rep(list(0), d-nb_null+1)
    indices_<-rep(list(0), d-nb_null)
    for(i in 1:(d-nb_null)){
      checkmat<-matrix(is.element(indices[[i+1]], var_null), i, ncol(indices[[i+1]]))
      idx_ind_null[[i]] <- as.vector(which(colSums(checkmat) == 0))
      indices_[[i]]<-matrix(indices[[i+1]][, idx_ind_null[[i]]],ncol=length(idx_ind_null[[i]]))
    }
    p=d #Original number of dimensions
    d=d-nb_null #Number of dimensions after removing spurious covariates
    Ps[[1]]<-Ps[[1]][setdiff(N, var_null)] #Only keeping w({i}) for non-spurious covariates
  }else{
    p=d
    nb_null=0
  }
  #Value of R2(full_model) to avoid a lot of search
  R2_comp<-R2s[[p+1]]
  
  for(ord in 2:d){
    #For every input orders : we start at 2 because Ps[[1]] has already been computed
    if(ord==d){
      #If this is the last order
      Ps[[ord]]=R2_comp*(1/sum(1/Ps[[ord-1]]))
    }else{
      if(any(logi.zeros)){
        #########################
        #If any spurious variables
        #Computing w(S) for all S of order ord
        Ws<-rev(R2_comp - R2s[[d+1-ord]][idx_ind_null[[d-ord]]])
        nbset<-ncol(indices_[[ord]]) #Number of subsets S
        Ps[[ord]]<-rep(0, nbset) #Setting up the Ps results
        for(i in 1:nbset){
          S<-c(indices_[[ord]][,i]) #For every possible subset S of order ord
          idx_Spi=which(colSums(matrix(indices_[[ord-1]]%in%S, nrow=length(S)-1, ncol=ncol(indices_[[ord-1]])))==length(S)-1) #Find every S\{i} for all i
          Ps[[ord]][i]=Ws[i]/sum(1/(Ps[[ord-1]][idx_Spi])) #Recursively compute Ps
        }
        
      }else{
        ########################
        #If no spurious variables
        #Computing w(S) for all S of order ord
        Ws<-rev(R2_comp - R2s[[d+1-ord]])
        nbset<-ncol(indices[[ord+1]])#Number of subsets S
        Ps[[ord]]<-rep(0, nbset)#Setting up the Ps results
        for(i in 1:nbset){
          S<-c(indices[[ord+1]][,i]) #For every possible subset S of order ord
          idx_Spi=which(colSums(matrix(indices[[ord]]%in%S, nrow=length(S)-1, ncol=ncol(indices[[ord]])))==length(S)-1)#Find every S\{i} for all i
          Ps[[ord]][i]=Ws[i]/sum(1/(Ps[[ord-1]][idx_Spi]))#Recursively compute Ps
        }
      }
    }
  }
  
  pmd<-matrix(rev(Ps[[d]]/Ps[[d-1]]), nrow=1)
  if(any(logi.zeros)){
    #If spurious variable, we set their value to 0
    index <- 1
    pmd_ <- rep(0, p)
    for (i in 1:p) {
      if (!is.element(i, var_null)) {
        pmd_[i] <- pmd[index]
        index <- index + 1
      }
    }
    pmd=matrix(pmd_, nrow=1)
  }
  colnames(pmd)=colnames(X)
  return(pmd)
}


############################################
# Main function
pmvd <- function(X, y, logistic = FALSE,  tol=NULL, rank=FALSE, nboot = 0, conf=0.95,  max.iter=1000, parl=NULL){
  ###########################################
  #Pre-processing

  #Retrieving dimension
  d<-ncol(X)

  #Checking for categorical inputs
  any.cat<-any(sapply(X,class)=="factor")

  ##########################################
  #Arguments check

  #X
  if(!(class(X)[1]=="matrix" | class(X)[1]=="data.frame")){
    stop("X must be either a matrix of a data frame.")
  }

  #y
  if (logistic==F & !is.numeric(y)){
    stop(paste("y must be a numeric vector of length ",nrow(X)," or a matrix with ", nrow(X), " rows and one column.", sep=""))
  }else if(logistic==T & (all(!is.logical(y), !is.factor(y), !is.numeric(y))) ){
    stop("y must be a logical, factor or numeric vector.")
  }else if (length(y)!=nrow(X)){
    stop(paste("y must be a vector of length ",nrow(X)," or a matrix with ", nrow(X), " rows and one column.", sep=""))
  }

  #logistic
  if(!is.logical(logistic)){
    stop("The 'logistic' argument must be logical, either TRUE or FALSE.")
  }

  #rank
  if(!is.logical(rank)){
    stop("The 'rank' argument must be logical, either TRUE or FALSE.")
  }else if (logistic & rank){
    rank=F
    warning("Impossible to perform a logistic regression with a rank transformation. Defaulted to rank=FALSE.")
  }else if (any.cat & rank){
    rank=F
    warning("Impossible to perform a rank transformation with categorical inputs. Defaulted to rank=FALSE.")
  }

  #nboot
  if(!is.numeric(nboot)){
    stop("The 'nboot' argument must be a positive integer.")
  }else if (nboot%%1!=0 | nboot<0 | length(nboot)!=1){
    stop("The 'nboot' argument must be a positive integer.")
  }else if(nboot==0){
    boot=F
  }else{
    boot=T
  }

  #parl
  if(!is.numeric(parl)){
    if(!is.null(parl)){
      stop("The 'parl' argument must be NULL or a positive integer.")
    }
  }else if(parl%%1!=0 | parl<0 | length(parl)!=1){
    stop("The 'parl' argument must be NULL or a positive integer.")
  }else if (parl>parallel::detectCores()){
    parl=parallel::detectCores()-1
    warning(paste("Too many cores specified. Defaulted to ", parl, " cores.", sep=""))
  }

  ########################################
  #Data Processing

  #Rank Transformation
  if(rank){
    X<-apply(X, 2, rank)
  }

  #List of subsets by order ([[1]]: subsets of order 1...etc)
  indices<-rep(list(0), d+1)
  #List of R2s, arranged by orders (same structure as the indices object)
  R2s<-rep(list(0), d+1)
  #List of elements for recursive computing

  #Computing subsets
  for(j in 1:d){
    indices[[j+1]]<-t(gtools::combinations(n=d, r=j))
  }

  #####################################################
  # PMVD Computing

  #Preparing parallel cluster
  if(!is.null(parl)){
    cl=parallel::makeCluster(parl)
  }

  if(!boot){
    #################################
    # Non-Bootstrap computation

    ############################################
    #Conditional elements computing
    #For every order, the corresponding R2 estimate of each
    #subset of indices are stored in R2s
    for(j in 1:d){
      if(!is.null(parl)){
        #############################
        #Parallelized R2 estimation
        R2s[[j+1]]<-parallel::parApply(cl,indices[[j+1]], 2,estim.R2,
                                       dataX=X,
                                       y=y,
                                       logistic=logistic,
                                       any.cat=any.cat,
                                       max.iter=max.iter)
      }else{
        #############################
        #R2 estimation
        R2s[[j+1]]<-apply(indices[[j+1]], 2,estim.R2,
                          dataX=X,
                          y=y,
                          logistic=logistic,
                          any.cat=any.cat,
                          max.iter=max.iter)
      }
    }

    #################################
    # PMVD indices computation
    #Preparing the recursive computation of the PMVD indices
    #Full model indices
    N=1:d
    #Initiating P(S)
    Ps<-rep(list(0), d+1)
    #Computing w({i}) forall i
    Ps[[1]]=rev(R2s[[d+1]] - R2s[[d]])
    
    #Finding if any w({i})=0 or |w({i})|<tol
    if(is.null(tol)){
      logi.zeros<-Ps[[1]]==0
    }else{
      logi.zeros<-abs(Ps[[1]])<tol
    }
    
    if(any(logi.zeros)){
      var_null<-which(logi.zeros)
      nb_null<-length(var_null)
      idx_ind_null=rep(list(0), d-nb_null+1)
      indices_<-rep(list(0), d-nb_null)
      for(i in 1:(d-nb_null)){
        checkmat<-matrix(is.element(indices[[i+1]], var_null), i, ncol(indices[[i+1]]))
        idx_ind_null[[i]] <- as.vector(which(colSums(checkmat) == 0))
        indices_[[i]]<-matrix(indices[[i+1]][, idx_ind_null[[i]]],ncol=length(idx_ind_null[[i]]))
      }
      p=d #Original number of dimensions
      d=d-nb_null #Number of dimensions after removing spurious covariates
      Ps[[1]]<-Ps[[1]][setdiff(N, var_null)] #Only keeping w({i}) for non-spurious covariates
    }else{
      p=d
      nb_null=0
    }
    #Value of R2(full_model) to avoid a lot of search
    R2_comp<-R2s[[p+1]]
    
    if(!is.null(parl)){doParallel::registerDoParallel(cl)}
    for(ord in 2:d){
      #For every input orders : we start at 2 because Ps[[1]] has already been computed
      if(ord==d){
        #If this is the last order
        Ps[[ord]]=R2_comp*(1/sum(1/Ps[[ord-1]]))
      }else{
        if(any(logi.zeros)){
          #########################
          #If any spurious variables
          #Computing w(S) for all S of order ord
          Ws<-rev(R2_comp - R2s[[d+1-ord]][idx_ind_null[[d-ord]]])
          nbset<-ncol(indices_[[ord]]) #Number of subsets S
         
          if(!is.null(parl)){
            Ps[[ord]]<-foreach::foreach(i=1:nbset, .combine=cbind)%dopar%{
              S<-c(indices_[[ord]][,i])
              idx_Spi=which(colSums(matrix(indices_[[ord-1]]%in%S, nrow=length(S)-1, ncol=ncol(indices_[[ord-1]])))==length(S)-1)
              res=Ws[i]/sum(1/(Ps[[ord-1]][idx_Spi]))
              res
            }
          }else{
            Ps[[ord]]<-rep(0, nbset) #Setting up the Ps results
            for(i in 1:nbset){
              S<-c(indices_[[ord]][,i]) #For every possible subset S of order ord
              idx_Spi=which(colSums(matrix(indices_[[ord-1]]%in%S, nrow=length(S)-1, ncol=ncol(indices_[[ord-1]])))==length(S)-1) #Find every S\{i} for all i
              Ps[[ord]][i]=Ws[i]/sum(1/(Ps[[ord-1]][idx_Spi])) #Recursively compute Ps
            }
          }
        }else{
          ########################
          #If no spurious variables
          #Computing w(S) for all S of order ord
          Ws<-rev(R2_comp - R2s[[d+1-ord]])
          nbset<-ncol(indices[[ord+1]])#Number of subsets S
         
          if(!is.null(parl)){
            Ps[[ord]]<-foreach::foreach(i=1:nbset, .combine=cbind)%dopar%{
              S<-c(indices[[ord+1]][,i])
              idx_Spi=which(colSums(matrix(indices[[ord]]%in%S, nrow=length(S)-1, ncol=ncol(indices[[ord]])))==length(S)-1)
              res=Ws[i]/sum(1/(Ps[[ord-1]][idx_Spi]))
              res
            }
          }else{
            Ps[[ord]]<-rep(0, nbset)#Setting up the Ps results
            for(i in 1:nbset){
              S<-c(indices[[ord+1]][,i]) #For every possible subset S of order ord
              idx_Spi=which(colSums(matrix(indices[[ord]]%in%S, nrow=length(S)-1, ncol=ncol(indices[[ord]])))==length(S)-1)#Find every S\{i} for all i
              Ps[[ord]][i]=Ws[i]/sum(1/(Ps[[ord-1]][idx_Spi]))#Recursively compute Ps
            }
          }
        }
      }
    }
    
    pmvd<-matrix(rev(Ps[[d]]/Ps[[d-1]]), ncol=1)
    if(any(logi.zeros)){
      #If spurious variable, we set their value to 0
      index <- 1
      pmvd_ <- rep(0, p)
      for (i in 1:p) {
        if (!is.element(i, var_null)) {
          pmvd_[i] <- pmvd[index]
          index <- index + 1
        }
      }
      pmvd=matrix(pmvd_, ncol=1)
    }
    rownames(pmvd)=colnames(X)
    colnames(pmvd)=c("original")

    #Preparing the output.
    out<-list("call"=match.call(),
              "pmvd"=pmvd,
              "R2s"=R2s,
              "indices"=indices,
              "P"=Ps,
              "conf_int"=NULL,
              "X"=X,
              "y"=y,
              "logistic"=logistic,
              "boot"=boot,
              "nboot"=nboot,
              "rank"=rank,
              "parl"=parl,
              "conf"=conf)
  }else{
    ##################################
    # Bootstrap computation

    ############################################
    #Conditional elements computing
    #For every order, the corresponding R2 estimate of each
    #subset of indices are stored in R2s
    for(j in 1:d){
      if(!is.null(parl)){
        #############################
        #Parallelized R2 estimation
        R2s[[j+1]]<-parallel::parApply(cl,indices[[j+1]], 2,estim.R2,
                                       dataX=X,
                                       y=y,
                                       logistic=logistic,
                                       any.cat=any.cat,
                                       max.iter=max.iter)
      }else{
        #############################
        #R2 estimation
        R2s[[j+1]]<-apply(indices[[j+1]], 2,estim.R2,
                          dataX=X,
                          y=y,
                          logistic=logistic,
                          any.cat=any.cat,
                          max.iter=max.iter)
      }
    }

    if(is.null(parl)){
      #Bootstrap confidence interval estimation
      boot_pmvd<-boot::boot(data=cbind(X),
                      statistic=calc.pmvd.rec.boot,
                      R=nboot,
                      # d=d,
                      y=y,
                      logistic=logistic,
                      indices=indices,
                      any.cat=any.cat,
                      tol=tol,
                      max.iter=max.iter,
                      stype="i")
    }else{
      #Parallelized Bootstrap confidence interval estimation
      boot_pmvd<-boot::boot(data=cbind(X),
                     statistic=calc.pmvd.rec.boot,
                     R=nboot,
                     # d=d,
                     y=y,
                     logistic=logistic,
                     indices=indices,
                     any.cat=any.cat,
                     stype="i",
                     parallel="snow",
                     ncpus=parl,
                     tol=tol,
                     max.iter=max.iter,
                     cl=cl)

    }

    res.boot<-bootstats(boot_pmvd, conf, "basic")
    rownames(res.boot)<-colnames(X)
    pmvd<-matrix(res.boot[,1], ncol=1)
    colnames(pmvd)=c("original")
    rownames(pmvd)=colnames(X)
    out<-list("call"=match.call(),
              "pmvd"=pmvd,
              "R2s"=R2s,
              "indices"=indices,
              "P"=NULL,
              "conf_int"=res.boot,
              "X"=X,
              "y"=y,
              "logistic"=logistic,
              "boot"=boot,
              "nboot"=nboot,
              "rank"=rank,
              "parl"=parl,
              "conf"=conf)
  }

  #Stopping the cluster after parallelized computation
  if(!is.null(parl)){parallel::stopCluster(cl)}

  class(out)<-"pmvd"

  return(out)
}

#######################################
# Custom methods
print.pmvd<-function(x, ...){
  cat("\nCall:\n", deparse(x$call), "\n", sep = "")
  if(x$logistic){
    cat("\nPMVD decomposition of R2 for logistic model\n")
  }else{
    cat("\nPMVD decomposition of R2 for linear model\n")
  }
  if(x$boot){
    print(x$conf_int)
  }else{
    print(x$pmvd)
  }
}


plot.pmvd<-function(x, ylim=c(0,1), ...){
  if(x$logistic){
    plot_title="PMVD decomposition of R2 for logistic model"
  }else{
    plot_title="PMVD decomposition of R2 for linear model"
  }

  plot(x$pmvd,
       ylim=ylim,
       axes=F,
       xlab="Inputs",
       ylab="PMVD",
       main=plot_title,
       ...)
  axis(2)
  axis(1, at=seq_along(x$pmvd), labels=colnames(x$X))
  box()
  graphics::grid()

  if(x$boot){
    segments(x0=1:ncol(x$X),
             y0=x$conf_int$`min. c.i.`,
             x1=1:ncol(x$X),
             y1=x$conf_int$`max. c.i.`)
    legend("topright",
           pch=c(1,NA),
           lty=c(NA,1),
           legend=c("PMVD values",
                    paste(x$conf*100, "% Confidence interval", sep="")),
           bg="white")
  }else{
    legend("topright",
           pch=1,
           legend=c("PMVD Values"),
           bg="white")
  }
}
Any scripts or data that you put into this service are public.
sensitivity documentation built on Sept. 11, 2024, 9:09 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
sensitivity
Global Sensitivity Analysis of Model Outputs and Importance Measures

R/pmvd.R
In sensitivity: Global Sensitivity Analysis of Model Outputs and Importance Measures

Defines functions plot.pmvd print.pmvd pmvd calc.pmvd.rec.boot estim.R2

Documented in plot.pmvd pmvd print.pmvd

Try the sensitivity package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

sensitivity Global Sensitivity Analysis of Model Outputs and Importance Measures

R/pmvd.R In sensitivity: Global Sensitivity Analysis of Model Outputs and Importance Measures

Defines functions plot.pmvd print.pmvd pmvd calc.pmvd.rec.boot estim.R2

Documented in plot.pmvd pmvd print.pmvd

Try the sensitivity package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

sensitivity
Global Sensitivity Analysis of Model Outputs and Importance Measures

R/pmvd.R
In sensitivity: Global Sensitivity Analysis of Model Outputs and Importance Measures