R/shapleysobol_knn.R
In sensitivity: Global Sensitivity Analysis of Model Outputs and Importance Measures

Documented in extract.shapleysobol_knn ggplot.shapleysobol_knn plot.shapleysobol_knn plot.sobol_knn print.shapleysobol_knn print.sobol_knn shapleysobol_knn tell.shapleysobol_knn

############################################
# Needed packages
# library(parallel)
# library(doParallel)
# library(foreach)
# library(gtools)
# library(boot)
# library(RANN)
# library(whitening)
# library(TSP)

#########################
#Input rescaling function
rescale_inputs <- function(X){
  # Mahalanobis whitening, new X has unit diagonal covariance matrix
  X <- whitening::whiten(X, method="ZCA-cor")
  # We then apply a copula transform
  X <- apply(X,2,rank)/nrow(X)
  return(X)
}

############################################
# Var(E[Y|X_A])/Var(Y) estimation by Rank/TSP
estim.VE.rank<-function(dataX, y, subset, n){
  #Subsetting X
  X<-dataX[, subset, drop=F]
  any.cat<-any(sapply(X,class)=="factor")
  p=ncol(X)
  y=c(y)
  
  if(any.cat){
    id.cat=which(sapply(X,class)=="factor")
    id.cont <- setdiff(1:p,id.cat)
    Xtemp <- as.matrix(X[,id.cont,drop=FALSE])
    for (i in 1:length(id.cat)){
      X.factor<-X[,id.cat[i], drop=F]
      df.temp <- X[,id.cat[i],drop=FALSE]; colnames(df.temp) <- "X"
      # Normalization to ensure that two different rows will have unit euclidean distance
      xx.temp <- model.matrix(~X-1,df.temp)*sqrt(2)/2
      Xtemp<-cbind(Xtemp, xx.temp)
    }
    X<-Xtemp
  }
  
  # Use ranking or TSP
  if (ncol(X)>1){
    # Use TSP
    dist.mat <- as.matrix(dist(X))
    tsp <- TSP::TSP(dist.mat)
    tour <- as.integer(TSP::solve_TSP(tsp))
    id.sort <- cbind(tour,tour[c(2:n,1)])
  }else{
    # Use ranking
    id.sort <- sort(X[,1], index.return = TRUE)$ix
    id.sort <- matrix(c(id.sort,id.sort[c(2:n,1)]),ncol=2)
  }
  
  var.cond<-apply(matrix(y[id.sort],ncol=2),1,var)
  
  #Returning an estimate of Var(E[Y|X_A])/Var(Y)
  VE_A<-1-mean(var.cond)/var(y)
  return(VE_A)
}

############################################
# Var(E[Y|X_A])/Var(Y) estimation by KNN
estim.VE.knn<-function(dataX, y, subset, n, n.knn, n.limit){
  #Subsetting X
  X<-dataX[, subset, drop=F]
  any.cat<-any(sapply(X,class)=="factor")
  p=ncol(X)
  y=c(y)
  
  #One Hot Encoding of categorical variables
  if(any.cat){
    id.cat=which(sapply(X,class)=="factor")
    id.cont <- setdiff(1:p,id.cat)
    Xtemp <- as.matrix(X[,id.cont,drop=FALSE])
    for (i in 1:length(id.cat)){
      X.factor<-X[,id.cat[i], drop=F]
      df.temp <- X[,id.cat[i],drop=FALSE]; colnames(df.temp) <- "X"
      # Normalization to ensure that two different rows will have unit euclidean distance
      xx.temp <- model.matrix(~X-1,df.temp)*sqrt(2)/2
      Xtemp<-cbind(Xtemp, xx.temp)
    }
    X<-Xtemp
  }
  if(n<=n.limit){
    ###############################################
    # When n is reasonable, compute exact distances
    
    #Computing distance matrix
    dist.mat<-as.matrix(dist(X))
    #Getting the n.knn closest points for each datapoint
    id.sort <- t(apply(dist.mat,2,order)[1:n.knn,])
  }else{
    ###############################################
    # When n is too large, compute approximated NN
    
    #Getting the approximated n.knn closest points for each datapoint
    id.sort <- RANN::nn2(X,k=n.knn)$nn.idx
  }
  #Estimating Var(Y|X_A)
  var.cond<-apply(matrix(y[id.sort],ncol=n.knn),1,var)
  
  #Returning an estimate of Var(E[Y|X_A])/Var(Y)
  VE_A<-1-mean(var.cond)/var(y)
  return(VE_A)
}

###########################################
# Computing Shapley effects via KNN 
calc.shapknn<-function(X, y, method, n.perm, perms, d, indices, comb_weights, parl, clus, boot, n.limit, n.knn, noise, i=1:nrow(X), get.sob=F){
  ###############################
  #Pre-Processing
  #Row selection (for Bootstrap estimate, default doesn't change anything)
  if(boot){
    y<-X[,1]
    X<-X[,-1]
  }
  n<-nrow(X)
  
  #List of VEs, arranged by orders (same structure as the indices object)
  if(get.sob){
    VEs<-rep(list(0), length(indices))
    d<-length(indices)-1
  }else{
    VEs<-rep(list(0), d+1)
  }

  
  ####################################
  #Conditional elements estimation
  #For every order, the corresponding VE estimate of each
  #subset of indices are stored in VEs
  if(noise){
    #The Shapley sum up to V(E[Y|X_D])/V(Y) with D={1,...,d} (all variables)
    #which may not be equal to 1
    for(j in 1:d){
      if(method=='knn'){
        #############################################
        # KNN Computation of the conditional elements
        if(!is.null(parl)){
          #############################
          #Parallelized VE estimation
          VEs[[j+1]]<-parallel::parApply(clus,indices[[j+1]], 2,estim.VE.knn,
                                         dataX=X,
                                         y=y,
                                         n=n,
                                         n.knn=n.knn,
                                         n.limit=n.limit)
          
        }else{
          #############################
          # VE estimation
          VEs[[j+1]]<-apply(indices[[j+1]], 2, estim.VE.knn,
                            dataX=X,
                            y=y,
                            n=n,
                            n.knn=n.knn,
                            n.limit=n.limit)
        }
      }else if(method=='rank'){
        ##################################################
        # Rank/TSP Computation of the conditional elements
        if(!is.null(parl)){
          #############################
          #Parallelized VE estimation
          VEs[[j+1]]<-parallel::parApply(clus,indices[[j+1]], 2,estim.VE.rank,
                                         dataX=X,
                                         y=y,
                                         n=n)
          
        }else{
          #############################
          # VE estimation
          VEs[[j+1]]<-apply(indices[[j+1]], 2, estim.VE.rank,
                            dataX=X,
                            y=y,
                            n=n)
        }
      }

    }
  }else{
    #Forces the Shapley effects to sum up to 1
    for(j in 1:(d-1)){
      if(method=='knn'){
        ##################################################
        # KNN Computation of the conditional elements
        if(!is.null(parl)){
          #############################
          #Parallelized VE estimation
          VEs[[j+1]]<-parallel::parApply(clus,indices[[j+1]], 2,estim.VE.knn,
                                         dataX=X,
                                         y=y,
                                         n=n,
                                         n.knn=n.knn,
                                         n.limit=n.limit)
          
        }else{
          #############################
          # VE estimation
          VEs[[j+1]]<-apply(indices[[j+1]], 2, estim.VE.knn,
                            dataX=X,
                            y=y,
                            n=n,
                            n.knn=n.knn,
                            n.limit=n.limit)
        }
    
      }else if (method=="rank"){
        ##################################################
        # Rank/TSP Computation of the conditional elements
        if(!is.null(parl)){
          #############################
          #Parallelized VE estimation
          VEs[[j+1]]<-parallel::parApply(clus,indices[[j+1]], 2,estim.VE.rank,
                                         dataX=X,
                                         y=y,
                                         n=n)
          
        }else{
          #############################
          # VE estimation
          VEs[[j+1]]<-apply(indices[[j+1]], 2, estim.VE.rank,
                            dataX=X,
                            y=y,
                            n=n)
        }
      }
    }  
  VEs[[d+1]]=1
  }
  if(get.sob){
    #Only computing the Sobol' indices
    return(unlist(VEs)[-1])
  }
  ###############################
  #Affecting the weights
  if(!is.null(parl)){
    ################################
    #Parallelized weight affectation
    if(is.null(n.perm)){
      #One core per input
      Shaps <- foreach::foreach(i=1:d, .combine=cbind)%dopar%{
        res=0
        #For every order, the increments are weighted and summed
        #for the input i
        for(ord in 1:d){
          if(ord==1){
            idx<-which(indices[[ord+1]]==i)
            res=res+comb_weights[ord]*VEs[[ord+1]][as.vector(idx)]
          }else{
            #Columns of subsets of order ord+1 containing var_j
            idx_j<-which(indices[[ord+1]]==i, arr.ind=T)[,2]
            #Columns of subsets of order ord without j
            idx_woj<-which(apply(indices[[ord]]!=i, 2, all))
            #Total increment value for order ord
            tot_incr<-sum(VEs[[ord+1]][as.vector(idx_j)]) - sum(VEs[[ord]][as.vector(idx_woj)])
            res=res+comb_weights[ord]*tot_incr
          }
        }
        res
      }
    }else{
      #Parallelized randperm
      Shaps <- foreach::foreach(i=1:d, .combine=cbind)%dopar%{
        res=0
        for(pm in 1:n.perm){
          perm=perms[,pm]
          ord=which(perm==i)
          if(ord!=1){
            idx_j=which(colSums(matrix(apply(indices[[ord+1]], 2 , function(x) perm[1:ord] %in% x), ncol=ncol(indices[[ord+1]])))==ord)
            idx_woj=which(colSums(matrix(apply(indices[[ord]], 2 , function(x) perm[1:(ord-1)] %in% x), ncol=ncol(indices[[ord]])))==ord-1)
            res=res+VEs[[ord+1]][idx_j]-VEs[[ord]][idx_woj]
          }else{
            idx_j=which(indices[[ord+1]]==i)
            res=res+VEs[[ord+1]][idx_j]
          }
        }
        res
      }
    }

  }else{
    ################################
    #Unparallelized weight affectation
    Shaps<-rep(0,d)
    if(is.null(n.perm)){
      for(var_j in 1:d){
        #For every order, the increments are weighted and summed
        #for the input var_j
        for(ord in 1:d){
          if(ord==1){
            idx<-which(indices[[ord+1]]==var_j)
            Shaps[var_j]=Shaps[var_j]+comb_weights[ord]*VEs[[ord+1]][as.vector(idx)]
          }else{
            #Columns of subsets of order ord+1 containing var_j
            idx_j<-which(indices[[ord+1]]==var_j, arr.ind=T)[,2]
            #Columns of subsets of order ord without j
            idx_woj<-which(apply(indices[[ord]]!=var_j, 2, all))
            #Total increment value for order ord
            tot_incr<-sum(VEs[[ord+1]][as.vector(idx_j)]) - sum(VEs[[ord]][as.vector(idx_woj)])
            Shaps[var_j]=Shaps[var_j]+comb_weights[ord]*tot_incr
          }
        }
      }
      
    }else{
      #Unparallelized randperm
      for(var_j in 1:d){
        for(pm in 1:n.perm){
          perm=perms[,pm]
          ord=which(perm==var_j)
          if(ord!=1){
            idx_j=which(colSums(matrix(apply(indices[[ord+1]], 2 , function(x) perm[1:ord] %in% x), ncol=ncol(indices[[ord+1]])))==ord)
            idx_woj=which(colSums(matrix(apply(indices[[ord]], 2 , function(x) perm[1:(ord-1)] %in% x), ncol=ncol(indices[[ord]])))==ord-1)
            Shaps[var_j]=Shaps[var_j]+VEs[[ord+1]][idx_j]-VEs[[ord]][idx_woj]
          }else{
            idx_j=which(indices[[ord+1]]==var_j)
            Shaps[var_j]=Shaps[var_j]+VEs[[ord+1]][idx_j]
          }
        }
      }
    }

  }
  if(is.null(n.perm)){
    Shaps=Shaps/d
  }else{
    Shaps=Shaps/n.perm
  }

  
  if(boot){
    #If Bootstrap computation, return the Shapley Effects
    return(Shaps)
  }else{
    #For non-bootstrap computation, return conditional elements and Shapley Effects
    return(list("VE"=VEs,
                "Shap"=Shaps))
  }
}

shapleysobol_knn <- function(model=NULL, X, method="knn", n.knn = 2, n.limit = 2000, U=NULL, n.perm=NULL, noise=F, rescale=F, nboot = NULL, boot.level = 0.8, conf=0.95, parl=NULL, ...){
  ###########################################
  #Pre-processing
  d<-ncol(X)
  n<-nrow(X)
  
  ###########################################
  #Computing y
  if(!is.null(model)){
    y=model(X,...)
  }else{
    y=NULL
  }
  ##########################################
  #Arguments check
  
  #X
#  if(!(class(X)[1]=="matrix" | class(X)[1]=="data.frame")){
  if(!(inherits(X, "matrix") | inherits(X, "data.frame"))){
    stop("X must be either a matrix of a data frame.")
  }
  #Setting-up colnames
  if(is.null(colnames(X))){
    colnames(X)<-paste("X", 1:d, sep="")
  }
  #Method
  if(!is.element(method, c("rank", "knn"))){
    stop("method must either be rank or knn.")
  }
  
  if(!is.null(n.perm)){
    if(!is.numeric(n.perm)){
      stop("n.perm must be a positive integer.")
    }else if(n.perm>=factorial(d)){
      warning("The number of random permutations is greater than the total number of permutation. Defaulted to n.perm=NULL.")
      n.perm=NULL
    }else if(n.perm<=0){
      stop("n.perm must be a positive integer.")
    }
    else if(n.perm%%1!=0){
      warning("n.perm must be a positive integer. Defaulted to its floor value.")
      n.perm=floor(n.perm)
    }
  }
  #Extracting Sobols
  if(!is.null(U)){
#    if(!class(U)[1]%in%c("numeric","matrix","integer", "list")){
    if(!inherits(U, "numeric") && !inherits(U, "matrix") && !inherits(U, "integer") && typeof(U) != "list"){
      stop("U must be either 0 (Total Sobol' indices), 1 (First-Order indices), a list of subsets, or a matrix.")
#    }else if(class(U)[1]%in%c("integer","numeric")){
    }else if(inherits(U, "integer") || inherits(U,"numeric")){
      if(!U%in%c(0,1)){
        stop("U must be either 0 (Total Sobol' indices), 1 (First-Order indices), a list of subsets, or a matrix.")
      }
    } 
    if(inherits(U, "matrix")){
      if(ncol(U)!=d){
        stop("U must have exactly ncol(X) columns.")
      }
    }
  }
  #Bootstrap Checks
  if(!is.null(nboot)){
    if(!is.numeric(nboot)){
      stop("The 'nboot' argument must be a positive integer.")
    }else if (nboot%%1!=0 | nboot<0 | length(nboot)!=1){
      stop("The 'nboot' argument must be a positive integer.")
    }else if(nboot==0){
      boot=F
    }else{
      boot=T
    }
  }else{
    boot=F
  }

  if(!is.numeric(boot.level)){
    stop("The 'boot.level' argument must be a numeric argument strictly between 0 and 1.")
  }else if(boot.level<=0 | boot.level >=1){
    stop("The 'boot.level' argument must be a numeric argument strictly between 0 and 1.")
  }
  
  #parl
  if(!is.numeric(parl)){
    if(!is.null(parl)){
      stop("The 'parl' argument must be NULL or a positive integer.")
    }
  }else if(parl%%1!=0 | parl<0 | length(parl)!=1){
    stop("The 'parl' argument must be NULL or a positive integer.")
  }else if (parl>parallel::detectCores()){
    parl=parallel::detectCores()-1
    warning(paste("Too many cores specified. Defaulted to ", parl, " cores.", sep=""))
  }
  
  if(!is.null(y)){
    out<-compute.shapleysobol_knn(X,
                                y,
                                method,
                                n.knn ,
                                n.limit,
                                U,
                                n.perm,
                                noise,
                                rescale,
                                nboot,
                                boot.level,
                                conf,
                                parl,
                                boot)
    out$call=match.call()
  }else{
    out<-list("call"=match.call(),
              "Shap"=NULL,
              "VE"=NULL,
              "indices"=NULL,
              "method"=method,
              "n.perm"=n.perm,
              "w"=NULL,
              "conf_int"=NULL,
              "X"=X,
              "y"=y,
              "n.knn"=n.knn,
              "U"=U,
              "rescale"=rescale,
              "n.limit="=n.limit,
              "boot.level"=boot.level,
              "noise"=noise,
              "boot"=boot,
              "nboot"=nboot,
              "parl"=parl,
              "conf"=conf
    )
    class(out)<-"shapleysobol_knn"
  }
  return(out)
}


compute.shapleysobol_knn<-function(X, y, method, n.knn, n.limit, U, n.perm, noise, rescale, nboot, boot.level, conf, parl, boot){ 
  ###########################################
  #Pre-processing
  d<-ncol(X)
  n<-nrow(X)
  
  ########################################
  #Data Processing
  
  #Identifying categorical inputs
  is.cat<-sapply(1:d, function(x)"factor"%in%class(X[,x]))
  #Rescaling the inputs if specified
  if(rescale & ncol(X[,!is.cat])>1){
    X[,!is.cat]=rescale_inputs(as.matrix(X[,!is.cat]))
  }
  
  #########################################
  #Computing Shapleys
  if(is.null(U)){

  #List of subsets by order ([[2]]: subsets of order 1...etc)
  indices<-rep(list(0), d+1)
  #Weights vector, each element is the weight for |A| ([1]:|A|=0, [2]:|A|=1, ...)
  comb_weights<-rep(0,d)
  
  #Computing subsets and weights
  if(is.null(n.perm)){
    perms=NULL
    ##########################
    #All permutations are used
    for(j in 1:d){
      indices[[j+1]]<-t(gtools::combinations(n=d, r=j))
      comb_weights[j]<-1/choose(n=(d-1), j-1)
    }
  }else{
    comb_weights<-rep(0,d)
    perms=matrix(NA, nrow=d, ncol=n.perm)
    #####################################
    # n.perm random permutations are used
    for(j in 1:d){
      indices[[j+1]]<-matrix(NA, nrow=j, ncol=choose(n=d, j))
      comb_weights[j]<-1
    }
    for(pm in 1:n.perm){
      #We sample a permutation of {1,...,d}
      perm<-sample.int(d,d)
      perms[,pm]<-perm
      #We fill indices accordingly
      for(j in 1:d){
        # subset of order j from the selected random permutation perm
        subs<-perm[1:j]
        if(any(is.na(indices[[j+1]][,1]))){
          #If it's the first subset of order j in indices
          indices[[j+1]][,1]<-subs
        }else{
          chk.sub=colSums(matrix(apply(indices[[j+1]], 2, function(x) subs %in% x), ncol=ncol(indices[[j+1]])))==j
          if(!any(chk.sub)){
              #If the subset is not already in indices, we add it
              id.sub<-max(which(colSums(matrix(is.na(indices[[j+1]]), ncol=ncol(indices[[j+1]])))==0))+1
              indices[[j+1]][,id.sub]<-subs
          }
        }
      }
    }
    #Removing NAs
    for(j in 1:d){
      indices[[j+1]]<-matrix(indices[[j+1]][,stats::complete.cases(t(indices[[j+1]]))], nrow=j)
    }
  }


  
  ########################################
  #Shap KNN Computation
  
  #Preparing parallel cluster
  if(!is.null(parl)){
    cl=parallel::makeCluster(parl)
    doParallel::registerDoParallel(cl)
  }else{
    cl=NULL
  }

  res_Shap<-calc.shapknn(X=X,
                        y=y,
                        method=method,
                        n.perm=n.perm,
                        perms=perms,
                        d=d,
                        indices=indices,
                        comb_weights=comb_weights,
                        parl=parl,
                        clus=cl,
                        boot=F,
                        n.limit=n.limit,
                        n.knn=n.knn,
                        noise=noise)
  
  res_Shap$Shap<-matrix(res_Shap$Shap, ncol=1)
  colnames(res_Shap$Shap)<-"original"
  rownames(res_Shap$Shap)<-colnames(X)
  
  out<-list("call"=match.call(),
            "Shap"=res_Shap$Shap,
            "VE"=res_Shap$VE,
            "indices"=indices,
            "method"=method,
            "n.perm"=n.perm,
            "w"=comb_weights,
            "conf_int"=NULL,
            "X"=X,
            "y"=y,
            "n.knn"=n.knn,
            "rescale"=rescale,
            "n.limit="=n.limit,
            "boot.level"=boot.level,
            "noise"=noise,
            "boot"=boot,
            "nboot"=nboot,
            "parl"=parl,
            "conf"=conf
  )
                           
  if(boot){
    #TODO Mieux travailler le warning.
    warning("Bootstrap confidence intervals may not be theoretically sound.")
    boot.size<-round(n*boot.level)
    sample.boot <- function(data,mle){
      out <- data[sample.int(n=nrow(data), size=mle),,drop=FALSE]
      return(out)
    }
    
    if(!is.null(parl)){
      res.boot <- boot::boot(data=cbind(y,X), statistic=calc.shapknn, 
                             R = nboot, 
                             sim = "parametric", 
                             ran.gen = sample.boot, 
                             mle = boot.size,
                             cl=cl,
                             n.perm=n.perm,
                             perms=perms,
                             ncpus=parl,
                             parallel="snow",
                             y=y,
                             d=d,
                             method=method,
                             indices=indices,
                             comb_weights=comb_weights,
                             parl=NULL,
                             clus=NULL,
                             boot=T,
                             n.limit=n.limit,
                             n.knn=n.knn,
                             noise=noise)
    }else{
      res.boot <- boot::boot(data=cbind(y,X), statistic=calc.shapknn, 
                             R = nboot, 
                             sim = "parametric", 
                             ran.gen = sample.boot, 
                             mle = boot.size,
                             y=y,
                             d=d,
                             method=method,
                             n.perm=n.perm,
                             perms=perms,
                             indices=indices,
                             comb_weights=comb_weights,
                             parl=NULL,
                             clus=NULL,
                             boot=T,
                             n.limit=n.limit,
                             n.knn=n.knn,
                             noise=noise)
      
    }
    
    out$conf_int<-bootstats(res.boot, conf, "basic")
    rownames(out$conf_int)<-colnames(X)
  }
  
  #Stopping the cluster after parallelized computation
  if(!is.null(parl)){parallel::stopCluster(cl)}
  
  #Customize the class of the output for custom methods
  class(out)<-"shapleysobol_knn"
  return(out)
  }else{
    ###########################################
    #Extracting Sobols
    
    #Formatting indices
#    if(class(U)[1]%in%c("integer", "numeric")){
    if(inherits(U, "integer") || inherits(U, "numeric")){
      if(U==0){
        #Only Total Sobol indices
        indices<-rep(list(0),2)
        mat.indices<-matrix(NA, nrow=d-1, ncol=d)
        for(i in 1:d){
          mat.indices[,i]<-setdiff(1:d, i)
        }
        indices[[2]]<-mat.indices
        cname<-"Total Sobol"
      }else if(U==1){
        #Only First-Order Sobol indices
        indices<-rep(list(0),2)
        indices[[2]]<-t(matrix(1:d))
        cname<-"Sobol"
      }
      #Extracting variables names
      rnames<-colnames(X)
    }else{
      #Computing Sobols of subsets
      if(inherits(U, "matrix")){
        #If U is a matrix, transform it to a list of subsets
        U<-apply(U,1,function(x) which(as.logical(x)))
        if(typeof(U)!="list"){
          U<-as.list(U)
        }
      }
      #When U is a list of subsets
      nb.subsets<-length(U)
      #Finding the number of different orders
      ords<-rep(0, nb.subsets)
      for(i in 1:nb.subsets){
        ords[i]<-length(U[[i]])
      }
      un.orders<-unique(ords)
      nb.orders<-length(un.orders)
      indices<-rep(list(0), nb.orders+1)
      #Putting the subsets in indices
      for(i in 1:nb.orders){
        curr.ord<-un.orders[i]
        curr.n.ord<-sum(ords==curr.ord)
        indices[[i+1]]<-matrix(NA, nrow=curr.ord, ncol=curr.n.ord)
        for(j in 1:length(which(ords==curr.ord))){
          sub<-which(ords==curr.ord)[j]
          indices[[i+1]][,j]<-U[[sub]]
        }
      }
      rnames<-unlist(sapply(indices[-1], function(x){
        res<-apply(x, 2, function(y) paste(colnames(X)[y], collapse="-"))
        return(res)
      }))
      cname<-"Closed Sobol"
    }  
    #From indices, extract Sobols
    if(!is.null(parl)){
      #If paralel, setting up the cluster
      cl=parallel::makeCluster(parl)
      doParallel::registerDoParallel(cl)
    }else{
      cl=NULL
    }
    S<-calc.shapknn(X=X, 
                 y=y, 
                 method=method, 
                 n.perm=n.perm, 
                 perms=NULL, 
                 d=d, 
                 indices=indices, 
                 comb_weights=NULL, 
                 parl=parl, 
                 clus=cl, 
                 boot=F, 
                 n.limit=n.limit, 
                 n.knn=n.knn, 
                 noise=T, 
                 i=1:nrow(X), 
                 get.sob=T)
    if(inherits(U, "integer") || inherits(U, "numeric")){
      if(U==0){
        S<-1-S
      }
    }
    S<-matrix(S, ncol=1)
    rownames(S)<-rnames
    colnames(S)<-cname
    out<-list("call"=match.call(),
              "Sobol"=S,
              "indices"=indices[-1],
              "method"=method,
              "conf_int"=NULL,
              "X"=X,
              "y"=y,
              "U"=U,
              "n.knn"=n.knn,
              "rescale"=rescale,
              "n.limit="=n.limit,
              "boot.level"=boot.level,
              "noise"=noise,
              "boot"=boot,
              "nboot"=nboot,
              "parl"=parl,
              "conf"=conf
    )
    class(out)<-"sobol_knn"
    
    if(boot){
      #TODO Mieux travailler le warning.
      warning("Bootstrap confidence intervals may not be theoretically sound.")
      boot.size<-round(n*boot.level)
      sample.boot <- function(data,mle){
        out <- data[sample.int(n=nrow(data), size=mle),,drop=FALSE]
        return(out)
      }
      
      if(!is.null(parl)){
        res.boot <- boot::boot(data=cbind(y,X), statistic=calc.shapknn, 
                               R = nboot, 
                               sim = "parametric", 
                               ran.gen = sample.boot, 
                               mle = boot.size,
                               cl=cl,
                               n.perm=NULL,
                               perms=NULL,
                               ncpus=parl,
                               parallel="snow",
                               y=y,
                               d=d,
                               method=method,
                               indices=indices,
                               comb_weights=NULL,
                               parl=NULL,
                               clus=NULL,
                               boot=T,
                               n.limit=n.limit,
                               n.knn=n.knn,
                               noise=T,
                               get.sob=T)
      }else{
        res.boot <- boot::boot(data=cbind(y,X), statistic=calc.shapknn, 
                               R = nboot, 
                               sim = "parametric", 
                               ran.gen = sample.boot, 
                               mle = boot.size,
                               y=y,
                               d=d,
                               method=method,
                               n.perm=NULL,
                               perms=NULL,
                               indices=indices,
                               comb_weights=NULL,
                               parl=NULL,
                               clus=NULL,
                               boot=T,
                               n.limit=n.limit,
                               n.knn=n.knn,
                               noise=T,
                               get.sob=T)
        
      }
      
      #Bootstraped results for Total indices
      out$conf_int<-bootstats(res.boot, conf, "basic")
      if(inherits(U, "integer") || inherits(U, "numeric")){
        if(U==0){
          out$conf_int[,c(1,4,5)]<-1-out$conf_int[,c(1,4,5)]
          #Switching min/max CI names
          colnames(out$conf_int)<-colnames(out$conf_int)[c(1,2,3,5,4)]
          out$conf_int<-out$conf_int[,c(1,2,3,5,4)]
        }
      }
      rownames(out$conf_int)<-rnames
    }
    #If parallel, stopping the cluster
    if(!is.null(parl)){parallel::stopCluster(cl)}
    return(out)
  }
}


#######################################
# Custom methods

print.shapleysobol_knn<-function(x, ...){
  cat("\nCall:\n", deparse(x$call), "\n", sep = "")
  if(x$method=="knn"){
    cat("\nShapley effects estimation by nearest-neighbor procedure\n")
  }else if(x$method=="rank"){
    cat("\nShapley effects estimation by ranking procedure\n")
  }


  if(x$boot){
    print(x$conf_int)
  }else{
    print(x$Shap)
  }
}


plot.shapleysobol_knn<-function(x, ylim=c(0,1), ...){
  if(x$method=="knn"){
    plot_title="Shapley effects estimation by nearest-neighbor procedure"
  }else if(x$method=="rank"){
    plot_title="Shapley effects estimation by ranking procedure"
  }
  
  plot(x$Shap,
       ylim=ylim,
       axes=F,
       xlab="Inputs",
       ylab="Shapley effects",
       main=plot_title,
       ...)
  axis(2)
  axis(1, at=seq_along(x$Shap), labels=colnames(x$X))
  box()
  graphics::grid()
  
  if(x$boot){
    segments(x0=1:ncol(x$X),
             y0=x$conf_int$`min. c.i.`,
             x1=1:ncol(x$X),
             y1=x$conf_int$`max. c.i.`)
    legend("topright",
           pch=c(1,NA),
           lty=c(NA,1),
           legend=c("Shapley effects",
                    paste(x$conf*100, "% Confidence interval", sep="")),
           bg="white")
  }else{
    legend("topright",
           pch=1,
           legend=c("Shapley effects"),
           bg="white")
  }
}

tell.shapleysobol_knn<-function(x,y,...){
  
  id.obj<-deparse(substitute(x))
  if (! is.null(y)) {
    x$y <- y
  }else if (is.null(x$y)) {
    stop("y not found.")
  }
  
  res<-compute.shapleysobol_knn(X=x$X,
                              y=x$y,
                              method=x$method,
                              n.knn=x$n.knn ,
                              n.limit=x$n.limit,
                              U=x$U,
                              n.perm=x$n.perm,
                              noise=x$noise,
                              rescale=x$rescale,
                              nboot=x$nboot,
                              boot.level=x$boot.level,
                              conf=x$conf,
                              parl=x$parl,
                              boot=x$boot)
  assign(id.obj, res, parent.frame())
  
  return(res)
}

extract.shapleysobol_knn<- function(x, ...){
  if (is.null(x$VE) | is.null(x$Shap)){
    stop("The Shapley effects must be computed (U=NULL) prior to extracting first-order and total Sobol' indices.")
  }
  d<-ncol(x$X)
  Sob<-as.matrix(x$VE[[2]], ncol=1)
  Sob.tot<-1-as.matrix(rev(x$VE[[d]]), ncol=1)
  colnames(Sob)<-"Sobol"
  rownames(Sob)<-rownames(x$Shap)
  colnames(Sob.tot)<-"Total Sobol"
  rownames(Sob.tot)<-rownames(x$Shap)
  return(list("Sobol"=Sob,
              "Sobol.tot"=Sob.tot))
}

#Plotting the Sobol' indices
plot.sobol_knn<-function(x, ylim=c(0,1), ...){
  if(inherits(x$U, "integer") || inherits(x$U, "numeric")){
    if(x$U==0){
      if(x$method=="knn"){
        plot_title=("\nTotal Sobol' indices estimation by nearest-neighbor procedure\n")
      }else if(x$method=="rank"){
        plot_title=("\nTotal Sobol' indices estimation by ranking procedure\n")
      }
      
    }else if(x$U==1){
      if(x$method=="knn"){
        plot_title=("\nFirst order Sobol' indices estimation by nearest-neighbor procedure\n")
      }else if(x$method=="rank"){
        plot_title=("\nFirst order Sobol' indices estimation by ranking procedure\n")
      }
    }
  }else{
    if(x$method=="knn"){
      plot_title=("\nClosed Sobol' indices estimation by nearest-neighbor procedure\n")
    }else if(x$method=="rank"){
      plot_title=("\nClosed Sobol' indices estimation by ranking procedure\n")
    }
  }
  
  plot(x$Sobol,
       ylim=ylim,
       axes=F,
       xlab="Inputs/Subsets",
       ylab="Sobol' indices",
       main=plot_title,
       ...)
  axis(2)
  axis(1, at=seq_along(x$Sobol), labels=rownames(x$Sobol))
  box()
  graphics::grid()
  
  if(x$boot){
    segments(x0=1:ncol(x$X),
             y0=x$conf_int$`min. c.i.`,
             x1=1:ncol(x$X),
             y1=x$conf_int$`max. c.i.`)
    legend("topright",
           pch=c(1,NA),
           lty=c(NA,1),
           legend=c("Sobol' indices",
                    paste(x$conf*100, "% Confidence interval", sep="")),
           bg="white")
  }else{
    legend("topright",
           pch=1,
           legend=c("Sobol' indices"),
           bg="white")
  }
}

#Printing the Sobol' indices
print.sobol_knn<-function(x, ...){
  cat("\nCall:\n", deparse(x$call), "\n", sep = "")
  if(inherits(x$U, "integer") || inherits(x$U, "numeric")){
    if(x$U==0){
      if(x$method=="knn"){
        cat("\n(independent) Total Sobol' indices estimation by nearest-neighbor\n")
      }else if(x$method=="rank"){
        cat("\n(independent) Total Sobol' indices estimation by ranking\n")
      }
      
    }else if(x$U==1){
      if(x$method=="knn"){
        cat("\n(full) First order Sobol' indices estimation by nearest-neighbor\n")
      }else if(x$method=="rank"){
        cat("\n(full) First order Sobol' indices estimation by ranking\n")
      }
    }
  }else{
    if(x$method=="knn"){
      cat("\nClosed Sobol' indices estimation by nearest-neighbor\n")
    }else if(x$method=="rank"){
      cat("\nClosed Sobol' indices estimation by ranking\n")
    }
  }
  
  if(x$boot){
    print(x$conf_int)
  }else{
    print(x$Sobol)
  }
}

ggplot.shapleysobol_knn<-function(data, mapping = aes(), ylim = c(0,1), ..., environment = parent.frame()) {
  x <- data
  if (!is.null(x$Shap)){
    shap <- as.data.frame(x$Shap) 
    colnames(shap) <- "original"
    nodeggplot(list(shap), xname = "Shapley effects", ylim = ylim)
  }else{
    stop("No plot available")
  }
}

ggplot.sobol_knn<-function(data, mapping = aes(), ylim = c(0,1), ..., environment = parent.frame()){
  x <- data
  if(!is.null(x$Sobol)){
    sobols<-as.data.frame(x$Sobol)
    colnames(sobols)<-"original"
    if(typeof(x$U)=="list"){
      x_name<-"Closed Sobol'"
    }else{
      if(x$U==1){
        x_name<-"(full) First Order Sobol'"
      }else if (x$U==0){
        x_name<-"(independent) Total Sobol'"
      }
    }
    nodeggplot(list(sobols), xname=x_name, ylim=ylim)
  }else{
    stop("No plot available")
  }

}
Any scripts or data that you put into this service are public.
sensitivity documentation built on Nov. 5, 2025, 6:55 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
sensitivity
Global Sensitivity Analysis of Model Outputs and Importance Measures

R/shapleysobol_knn.R
In sensitivity: Global Sensitivity Analysis of Model Outputs and Importance Measures

Defines functions ggplot.sobol_knn ggplot.shapleysobol_knn print.sobol_knn plot.sobol_knn extract.shapleysobol_knn tell.shapleysobol_knn plot.shapleysobol_knn print.shapleysobol_knn compute.shapleysobol_knn shapleysobol_knn calc.shapknn estim.VE.knn estim.VE.rank rescale_inputs

Documented in extract.shapleysobol_knn ggplot.shapleysobol_knn plot.shapleysobol_knn plot.sobol_knn print.shapleysobol_knn print.sobol_knn shapleysobol_knn tell.shapleysobol_knn

Try the sensitivity package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

sensitivity Global Sensitivity Analysis of Model Outputs and Importance Measures

R/shapleysobol_knn.R In sensitivity: Global Sensitivity Analysis of Model Outputs and Importance Measures

Defines functions ggplot.sobol_knn ggplot.shapleysobol_knn print.sobol_knn plot.sobol_knn extract.shapleysobol_knn tell.shapleysobol_knn plot.shapleysobol_knn print.shapleysobol_knn compute.shapleysobol_knn shapleysobol_knn calc.shapknn estim.VE.knn estim.VE.rank rescale_inputs

Documented in extract.shapleysobol_knn ggplot.shapleysobol_knn plot.shapleysobol_knn plot.sobol_knn print.shapleysobol_knn print.sobol_knn shapleysobol_knn tell.shapleysobol_knn

Try the sensitivity package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

sensitivity
Global Sensitivity Analysis of Model Outputs and Importance Measures

R/shapleysobol_knn.R
In sensitivity: Global Sensitivity Analysis of Model Outputs and Importance Measures