R/shapleysobol_knn.R

Defines functions ggplot.sobol_knn ggplot.shapleysobol_knn print.sobol_knn plot.sobol_knn extract.shapleysobol_knn tell.shapleysobol_knn plot.shapleysobol_knn print.shapleysobol_knn compute.shapleysobol_knn shapleysobol_knn calc.shapknn estim.VE.knn estim.VE.rank rescale_inputs

Documented in extract.shapleysobol_knn ggplot.shapleysobol_knn plot.shapleysobol_knn plot.sobol_knn print.shapleysobol_knn print.sobol_knn shapleysobol_knn tell.shapleysobol_knn

############################################
# Needed packages
# library(parallel)
# library(doParallel)
# library(foreach)
# library(gtools)
# library(boot)
# library(RANN)
# library(whitening)
# library(TSP)

#########################
#Input rescaling function
rescale_inputs <- function(X){
  # Mahalanobis whitening, new X has unit diagonal covariance matrix
  X <- whitening::whiten(X, method="ZCA-cor")
  # We then apply a copula transform
  X <- apply(X,2,rank)/nrow(X)
  return(X)
}

############################################
# Var(E[Y|X_A])/Var(Y) estimation by Rank/TSP
estim.VE.rank<-function(dataX, y, subset, n){
  #Subsetting X
  X<-dataX[, subset, drop=F]
  any.cat<-any(sapply(X,class)=="factor")
  p=ncol(X)
  y=c(y)
  
  if(any.cat){
    id.cat=which(sapply(X,class)=="factor")
    id.cont <- setdiff(1:p,id.cat)
    Xtemp <- as.matrix(X[,id.cont,drop=FALSE])
    for (i in 1:length(id.cat)){
      X.factor<-X[,id.cat[i], drop=F]
      df.temp <- X[,id.cat[i],drop=FALSE]; colnames(df.temp) <- "X"
      # Normalization to ensure that two different rows will have unit euclidean distance
      xx.temp <- model.matrix(~X-1,df.temp)*sqrt(2)/2
      Xtemp<-cbind(Xtemp, xx.temp)
    }
    X<-Xtemp
  }
  
  # Use ranking or TSP
  if (ncol(X)>1){
    # Use TSP
    dist.mat <- as.matrix(dist(X))
    tsp <- TSP::TSP(dist.mat)
    tour <- as.integer(TSP::solve_TSP(tsp))
    id.sort <- cbind(tour,tour[c(2:n,1)])
  }else{
    # Use ranking
    id.sort <- sort(X[,1], index.return = TRUE)$ix
    id.sort <- matrix(c(id.sort,id.sort[c(2:n,1)]),ncol=2)
  }
  
  var.cond<-apply(matrix(y[id.sort],ncol=2),1,var)
  
  #Returning an estimate of Var(E[Y|X_A])/Var(Y)
  VE_A<-1-mean(var.cond)/var(y)
  return(VE_A)
}

############################################
# Var(E[Y|X_A])/Var(Y) estimation by KNN
estim.VE.knn<-function(dataX, y, subset, n, n.knn, n.limit){
  #Subsetting X
  X<-dataX[, subset, drop=F]
  any.cat<-any(sapply(X,class)=="factor")
  p=ncol(X)
  y=c(y)
  
  #One Hot Encoding of categorical variables
  if(any.cat){
    id.cat=which(sapply(X,class)=="factor")
    id.cont <- setdiff(1:p,id.cat)
    Xtemp <- as.matrix(X[,id.cont,drop=FALSE])
    for (i in 1:length(id.cat)){
      X.factor<-X[,id.cat[i], drop=F]
      df.temp <- X[,id.cat[i],drop=FALSE]; colnames(df.temp) <- "X"
      # Normalization to ensure that two different rows will have unit euclidean distance
      xx.temp <- model.matrix(~X-1,df.temp)*sqrt(2)/2
      Xtemp<-cbind(Xtemp, xx.temp)
    }
    X<-Xtemp
  }
  if(n<=n.limit){
    ###############################################
    # When n is reasonable, compute exact distances
    
    #Computing distance matrix
    dist.mat<-as.matrix(dist(X))
    #Getting the n.knn closest points for each datapoint
    id.sort <- t(apply(dist.mat,2,order)[1:n.knn,])
  }else{
    ###############################################
    # When n is too large, compute approximated NN
    
    #Getting the approximated n.knn closest points for each datapoint
    id.sort <- RANN::nn2(X,k=n.knn)$nn.idx
  }
  #Estimating Var(Y|X_A)
  var.cond<-apply(matrix(y[id.sort],ncol=n.knn),1,var)
  
  #Returning an estimate of Var(E[Y|X_A])/Var(Y)
  VE_A<-1-mean(var.cond)/var(y)
  return(VE_A)
}

###########################################
# Computing Shapley effects via KNN 
calc.shapknn<-function(X, y, method, n.perm, perms, d, indices, comb_weights, parl, clus, boot, n.limit, n.knn, noise, i=1:nrow(X), get.sob=F){
  ###############################
  #Pre-Processing
  #Row selection (for Bootstrap estimate, default doesn't change anything)
  if(boot){
    y<-X[,1]
    X<-X[,-1]
  }
  n<-nrow(X)
  
  #List of VEs, arranged by orders (same structure as the indices object)
  if(get.sob){
    VEs<-rep(list(0), length(indices))
    d<-length(indices)-1
  }else{
    VEs<-rep(list(0), d+1)
  }

  
  ####################################
  #Conditional elements estimation
  #For every order, the corresponding VE estimate of each
  #subset of indices are stored in VEs
  if(noise){
    #The Shapley sum up to V(E[Y|X_D])/V(Y) with D={1,...,d} (all variables)
    #which may not be equal to 1
    for(j in 1:d){
      if(method=='knn'){
        #############################################
        # KNN Computation of the conditional elements
        if(!is.null(parl)){
          #############################
          #Parallelized VE estimation
          VEs[[j+1]]<-parallel::parApply(clus,indices[[j+1]], 2,estim.VE.knn,
                                         dataX=X,
                                         y=y,
                                         n=n,
                                         n.knn=n.knn,
                                         n.limit=n.limit)
          
        }else{
          #############################
          # VE estimation
          VEs[[j+1]]<-apply(indices[[j+1]], 2, estim.VE.knn,
                            dataX=X,
                            y=y,
                            n=n,
                            n.knn=n.knn,
                            n.limit=n.limit)
        }
      }else if(method=='rank'){
        ##################################################
        # Rank/TSP Computation of the conditional elements
        if(!is.null(parl)){
          #############################
          #Parallelized VE estimation
          VEs[[j+1]]<-parallel::parApply(clus,indices[[j+1]], 2,estim.VE.rank,
                                         dataX=X,
                                         y=y,
                                         n=n)
          
        }else{
          #############################
          # VE estimation
          VEs[[j+1]]<-apply(indices[[j+1]], 2, estim.VE.rank,
                            dataX=X,
                            y=y,
                            n=n)
        }
      }

    }
  }else{
    #Forces the Shapley effects to sum up to 1
    for(j in 1:(d-1)){
      if(method=='knn'){
        ##################################################
        # KNN Computation of the conditional elements
        if(!is.null(parl)){
          #############################
          #Parallelized VE estimation
          VEs[[j+1]]<-parallel::parApply(clus,indices[[j+1]], 2,estim.VE.knn,
                                         dataX=X,
                                         y=y,
                                         n=n,
                                         n.knn=n.knn,
                                         n.limit=n.limit)
          
        }else{
          #############################
          # VE estimation
          VEs[[j+1]]<-apply(indices[[j+1]], 2, estim.VE.knn,
                            dataX=X,
                            y=y,
                            n=n,
                            n.knn=n.knn,
                            n.limit=n.limit)
        }
    
      }else if (method=="rank"){
        ##################################################
        # Rank/TSP Computation of the conditional elements
        if(!is.null(parl)){
          #############################
          #Parallelized VE estimation
          VEs[[j+1]]<-parallel::parApply(clus,indices[[j+1]], 2,estim.VE.rank,
                                         dataX=X,
                                         y=y,
                                         n=n)
          
        }else{
          #############################
          # VE estimation
          VEs[[j+1]]<-apply(indices[[j+1]], 2, estim.VE.rank,
                            dataX=X,
                            y=y,
                            n=n)
        }
      }
    }  
  VEs[[d+1]]=1
  }
  if(get.sob){
    #Only computing the Sobol' indices
    return(unlist(VEs)[-1])
  }
  ###############################
  #Affecting the weights
  if(!is.null(parl)){
    ################################
    #Parallelized weight affectation
    if(is.null(n.perm)){
      #One core per input
      Shaps <- foreach::foreach(i=1:d, .combine=cbind)%dopar%{
        res=0
        #For every order, the increments are weighted and summed
        #for the input i
        for(ord in 1:d){
          if(ord==1){
            idx<-which(indices[[ord+1]]==i)
            res=res+comb_weights[ord]*VEs[[ord+1]][as.vector(idx)]
          }else{
            #Columns of subsets of order ord+1 containing var_j
            idx_j<-which(indices[[ord+1]]==i, arr.ind=T)[,2]
            #Columns of subsets of order ord without j
            idx_woj<-which(apply(indices[[ord]]!=i, 2, all))
            #Total increment value for order ord
            tot_incr<-sum(VEs[[ord+1]][as.vector(idx_j)]) - sum(VEs[[ord]][as.vector(idx_woj)])
            res=res+comb_weights[ord]*tot_incr
          }
        }
        res
      }
    }else{
      #Parallelized randperm
      Shaps <- foreach::foreach(i=1:d, .combine=cbind)%dopar%{
        res=0
        for(pm in 1:n.perm){
          perm=perms[,pm]
          ord=which(perm==i)
          if(ord!=1){
            idx_j=which(colSums(matrix(apply(indices[[ord+1]], 2 , function(x) perm[1:ord] %in% x), ncol=ncol(indices[[ord+1]])))==ord)
            idx_woj=which(colSums(matrix(apply(indices[[ord]], 2 , function(x) perm[1:(ord-1)] %in% x), ncol=ncol(indices[[ord]])))==ord-1)
            res=res+VEs[[ord+1]][idx_j]-VEs[[ord]][idx_woj]
          }else{
            idx_j=which(indices[[ord+1]]==i)
            res=res+VEs[[ord+1]][idx_j]
          }
        }
        res
      }
    }

  }else{
    ################################
    #Unparallelized weight affectation
    Shaps<-rep(0,d)
    if(is.null(n.perm)){
      for(var_j in 1:d){
        #For every order, the increments are weighted and summed
        #for the input var_j
        for(ord in 1:d){
          if(ord==1){
            idx<-which(indices[[ord+1]]==var_j)
            Shaps[var_j]=Shaps[var_j]+comb_weights[ord]*VEs[[ord+1]][as.vector(idx)]
          }else{
            #Columns of subsets of order ord+1 containing var_j
            idx_j<-which(indices[[ord+1]]==var_j, arr.ind=T)[,2]
            #Columns of subsets of order ord without j
            idx_woj<-which(apply(indices[[ord]]!=var_j, 2, all))
            #Total increment value for order ord
            tot_incr<-sum(VEs[[ord+1]][as.vector(idx_j)]) - sum(VEs[[ord]][as.vector(idx_woj)])
            Shaps[var_j]=Shaps[var_j]+comb_weights[ord]*tot_incr
          }
        }
      }
      
    }else{
      #Unparallelized randperm
      for(var_j in 1:d){
        for(pm in 1:n.perm){
          perm=perms[,pm]
          ord=which(perm==var_j)
          if(ord!=1){
            idx_j=which(colSums(matrix(apply(indices[[ord+1]], 2 , function(x) perm[1:ord] %in% x), ncol=ncol(indices[[ord+1]])))==ord)
            idx_woj=which(colSums(matrix(apply(indices[[ord]], 2 , function(x) perm[1:(ord-1)] %in% x), ncol=ncol(indices[[ord]])))==ord-1)
            Shaps[var_j]=Shaps[var_j]+VEs[[ord+1]][idx_j]-VEs[[ord]][idx_woj]
          }else{
            idx_j=which(indices[[ord+1]]==var_j)
            Shaps[var_j]=Shaps[var_j]+VEs[[ord+1]][idx_j]
          }
        }
      }
    }

  }
  if(is.null(n.perm)){
    Shaps=Shaps/d
  }else{
    Shaps=Shaps/n.perm
  }

  
  if(boot){
    #If Bootstrap computation, return the Shapley Effects
    return(Shaps)
  }else{
    #For non-bootstrap computation, return conditional elements and Shapley Effects
    return(list("VE"=VEs,
                "Shap"=Shaps))
  }
}

shapleysobol_knn <- function(model=NULL, X, method="knn", n.knn = 2, n.limit = 2000, U=NULL, n.perm=NULL, noise=F, rescale=F, nboot = NULL, boot.level = 0.8, conf=0.95, parl=NULL, ...){
  ###########################################
  #Pre-processing
  d<-ncol(X)
  n<-nrow(X)
  
  ###########################################
  #Computing y
  if(!is.null(model)){
    y=model(X,...)
  }else{
    y=NULL
  }
  ##########################################
  #Arguments check
  
  #X
#  if(!(class(X)[1]=="matrix" | class(X)[1]=="data.frame")){
  if(!(inherits(X, "matrix") | inherits(X, "data.frame"))){
    stop("X must be either a matrix of a data frame.")
  }
  #Setting-up colnames
  if(is.null(colnames(X))){
    colnames(X)<-paste("X", 1:d, sep="")
  }
  #Method
  if(!is.element(method, c("rank", "knn"))){
    stop("method must either be rank or knn.")
  }
  
  if(!is.null(n.perm)){
    if(!is.numeric(n.perm)){
      stop("n.perm must be a positive integer.")
    }else if(n.perm>=factorial(d)){
      warning("The number of random permutations is greater than the total number of permutation. Defaulted to n.perm=NULL.")
      n.perm=NULL
    }else if(n.perm<=0){
      stop("n.perm must be a positive integer.")
    }
    else if(n.perm%%1!=0){
      warning("n.perm must be a positive integer. Defaulted to its floor value.")
      n.perm=floor(n.perm)
    }
  }
  #Extracting Sobols
  if(!is.null(U)){
#    if(!class(U)[1]%in%c("numeric","matrix","integer", "list")){
    if(!inherits(U, "numeric") && !inherits(U, "matrix") && !inherits(U, "integer") && typeof(U) != "list"){
      stop("U must be either 0 (Total Sobol' indices), 1 (First-Order indices), a list of subsets, or a matrix.")
#    }else if(class(U)[1]%in%c("integer","numeric")){
    }else if(inherits(U, "integer") || inherits(U,"numeric")){
      if(!U%in%c(0,1)){
        stop("U must be either 0 (Total Sobol' indices), 1 (First-Order indices), a list of subsets, or a matrix.")
      }
    } 
    if(inherits(U, "matrix")){
      if(ncol(U)!=d){
        stop("U must have exactly ncol(X) columns.")
      }
    }
  }
  #Bootstrap Checks
  if(!is.null(nboot)){
    if(!is.numeric(nboot)){
      stop("The 'nboot' argument must be a positive integer.")
    }else if (nboot%%1!=0 | nboot<0 | length(nboot)!=1){
      stop("The 'nboot' argument must be a positive integer.")
    }else if(nboot==0){
      boot=F
    }else{
      boot=T
    }
  }else{
    boot=F
  }

  if(!is.numeric(boot.level)){
    stop("The 'boot.level' argument must be a numeric argument strictly between 0 and 1.")
  }else if(boot.level<=0 | boot.level >=1){
    stop("The 'boot.level' argument must be a numeric argument strictly between 0 and 1.")
  }
  
  #parl
  if(!is.numeric(parl)){
    if(!is.null(parl)){
      stop("The 'parl' argument must be NULL or a positive integer.")
    }
  }else if(parl%%1!=0 | parl<0 | length(parl)!=1){
    stop("The 'parl' argument must be NULL or a positive integer.")
  }else if (parl>parallel::detectCores()){
    parl=parallel::detectCores()-1
    warning(paste("Too many cores specified. Defaulted to ", parl, " cores.", sep=""))
  }
  
  if(!is.null(y)){
    out<-compute.shapleysobol_knn(X,
                                y,
                                method,
                                n.knn ,
                                n.limit,
                                U,
                                n.perm,
                                noise,
                                rescale,
                                nboot,
                                boot.level,
                                conf,
                                parl,
                                boot)
    out$call=match.call()
  }else{
    out<-list("call"=match.call(),
              "Shap"=NULL,
              "VE"=NULL,
              "indices"=NULL,
              "method"=method,
              "n.perm"=n.perm,
              "w"=NULL,
              "conf_int"=NULL,
              "X"=X,
              "y"=y,
              "n.knn"=n.knn,
              "U"=U,
              "rescale"=rescale,
              "n.limit="=n.limit,
              "boot.level"=boot.level,
              "noise"=noise,
              "boot"=boot,
              "nboot"=nboot,
              "parl"=parl,
              "conf"=conf
    )
    class(out)<-"shapleysobol_knn"
  }
  return(out)
}


compute.shapleysobol_knn<-function(X, y, method, n.knn, n.limit, U, n.perm, noise, rescale, nboot, boot.level, conf, parl, boot){ 
  ###########################################
  #Pre-processing
  d<-ncol(X)
  n<-nrow(X)
  
  ########################################
  #Data Processing
  
  #Identifying categorical inputs
  is.cat<-sapply(1:d, function(x)"factor"%in%class(X[,x]))
  #Rescaling the inputs if specified
  if(rescale & ncol(X[,!is.cat])>1){
    X[,!is.cat]=rescale_inputs(as.matrix(X[,!is.cat]))
  }
  
  #########################################
  #Computing Shapleys
  if(is.null(U)){

  #List of subsets by order ([[2]]: subsets of order 1...etc)
  indices<-rep(list(0), d+1)
  #Weights vector, each element is the weight for |A| ([1]:|A|=0, [2]:|A|=1, ...)
  comb_weights<-rep(0,d)
  
  #Computing subsets and weights
  if(is.null(n.perm)){
    perms=NULL
    ##########################
    #All permutations are used
    for(j in 1:d){
      indices[[j+1]]<-t(gtools::combinations(n=d, r=j))
      comb_weights[j]<-1/choose(n=(d-1), j-1)
    }
  }else{
    comb_weights<-rep(0,d)
    perms=matrix(NA, nrow=d, ncol=n.perm)
    #####################################
    # n.perm random permutations are used
    for(j in 1:d){
      indices[[j+1]]<-matrix(NA, nrow=j, ncol=choose(n=d, j))
      comb_weights[j]<-1
    }
    for(pm in 1:n.perm){
      #We sample a permutation of {1,...,d}
      perm<-sample.int(d,d)
      perms[,pm]<-perm
      #We fill indices accordingly
      for(j in 1:d){
        # subset of order j from the selected random permutation perm
        subs<-perm[1:j]
        if(any(is.na(indices[[j+1]][,1]))){
          #If it's the first subset of order j in indices
          indices[[j+1]][,1]<-subs
        }else{
          chk.sub=colSums(matrix(apply(indices[[j+1]], 2, function(x) subs %in% x), ncol=ncol(indices[[j+1]])))==j
          if(!any(chk.sub)){
              #If the subset is not already in indices, we add it
              id.sub<-max(which(colSums(matrix(is.na(indices[[j+1]]), ncol=ncol(indices[[j+1]])))==0))+1
              indices[[j+1]][,id.sub]<-subs
          }
        }
      }
    }
    #Removing NAs
    for(j in 1:d){
      indices[[j+1]]<-matrix(indices[[j+1]][,stats::complete.cases(t(indices[[j+1]]))], nrow=j)
    }
  }


  
  ########################################
  #Shap KNN Computation
  
  #Preparing parallel cluster
  if(!is.null(parl)){
    cl=parallel::makeCluster(parl)
    doParallel::registerDoParallel(cl)
  }else{
    cl=NULL
  }

  res_Shap<-calc.shapknn(X=X,
                        y=y,
                        method=method,
                        n.perm=n.perm,
                        perms=perms,
                        d=d,
                        indices=indices,
                        comb_weights=comb_weights,
                        parl=parl,
                        clus=cl,
                        boot=F,
                        n.limit=n.limit,
                        n.knn=n.knn,
                        noise=noise)
  
  res_Shap$Shap<-matrix(res_Shap$Shap, ncol=1)
  colnames(res_Shap$Shap)<-"original"
  rownames(res_Shap$Shap)<-colnames(X)
  
  out<-list("call"=match.call(),
            "Shap"=res_Shap$Shap,
            "VE"=res_Shap$VE,
            "indices"=indices,
            "method"=method,
            "n.perm"=n.perm,
            "w"=comb_weights,
            "conf_int"=NULL,
            "X"=X,
            "y"=y,
            "n.knn"=n.knn,
            "rescale"=rescale,
            "n.limit="=n.limit,
            "boot.level"=boot.level,
            "noise"=noise,
            "boot"=boot,
            "nboot"=nboot,
            "parl"=parl,
            "conf"=conf
  )
                           
  if(boot){
    #TODO Mieux travailler le warning.
    warning("Bootstrap confidence intervals may not be theoretically sound.")
    boot.size<-round(n*boot.level)
    sample.boot <- function(data,mle){
      out <- data[sample.int(n=nrow(data), size=mle),,drop=FALSE]
      return(out)
    }
    
    if(!is.null(parl)){
      res.boot <- boot::boot(data=cbind(y,X), statistic=calc.shapknn, 
                             R = nboot, 
                             sim = "parametric", 
                             ran.gen = sample.boot, 
                             mle = boot.size,
                             cl=cl,
                             n.perm=n.perm,
                             perms=perms,
                             ncpus=parl,
                             parallel="snow",
                             y=y,
                             d=d,
                             method=method,
                             indices=indices,
                             comb_weights=comb_weights,
                             parl=NULL,
                             clus=NULL,
                             boot=T,
                             n.limit=n.limit,
                             n.knn=n.knn,
                             noise=noise)
    }else{
      res.boot <- boot::boot(data=cbind(y,X), statistic=calc.shapknn, 
                             R = nboot, 
                             sim = "parametric", 
                             ran.gen = sample.boot, 
                             mle = boot.size,
                             y=y,
                             d=d,
                             method=method,
                             n.perm=n.perm,
                             perms=perms,
                             indices=indices,
                             comb_weights=comb_weights,
                             parl=NULL,
                             clus=NULL,
                             boot=T,
                             n.limit=n.limit,
                             n.knn=n.knn,
                             noise=noise)
      
    }
    
    out$conf_int<-bootstats(res.boot, conf, "basic")
    rownames(out$conf_int)<-colnames(X)
  }
  
  #Stopping the cluster after parallelized computation
  if(!is.null(parl)){parallel::stopCluster(cl)}
  
  #Customize the class of the output for custom methods
  class(out)<-"shapleysobol_knn"
  return(out)
  }else{
    ###########################################
    #Extracting Sobols
    
    #Formatting indices
#    if(class(U)[1]%in%c("integer", "numeric")){
    if(inherits(U, "integer") || inherits(U, "numeric")){
      if(U==0){
        #Only Total Sobol indices
        indices<-rep(list(0),2)
        mat.indices<-matrix(NA, nrow=d-1, ncol=d)
        for(i in 1:d){
          mat.indices[,i]<-setdiff(1:d, i)
        }
        indices[[2]]<-mat.indices
        cname<-"Total Sobol"
      }else if(U==1){
        #Only First-Order Sobol indices
        indices<-rep(list(0),2)
        indices[[2]]<-t(matrix(1:d))
        cname<-"Sobol"
      }
      #Extracting variables names
      rnames<-colnames(X)
    }else{
      #Computing Sobols of subsets
      if(inherits(U, "matrix")){
        #If U is a matrix, transform it to a list of subsets
        U<-apply(U,1,function(x) which(as.logical(x)))
        if(typeof(U)!="list"){
          U<-as.list(U)
        }
      }
      #When U is a list of subsets
      nb.subsets<-length(U)
      #Finding the number of different orders
      ords<-rep(0, nb.subsets)
      for(i in 1:nb.subsets){
        ords[i]<-length(U[[i]])
      }
      un.orders<-unique(ords)
      nb.orders<-length(un.orders)
      indices<-rep(list(0), nb.orders+1)
      #Putting the subsets in indices
      for(i in 1:nb.orders){
        curr.ord<-un.orders[i]
        curr.n.ord<-sum(ords==curr.ord)
        indices[[i+1]]<-matrix(NA, nrow=curr.ord, ncol=curr.n.ord)
        for(j in 1:length(which(ords==curr.ord))){
          sub<-which(ords==curr.ord)[j]
          indices[[i+1]][,j]<-U[[sub]]
        }
      }
      rnames<-unlist(sapply(indices[-1], function(x){
        res<-apply(x, 2, function(y) paste(colnames(X)[y], collapse="-"))
        return(res)
      }))
      cname<-"Closed Sobol"
    }  
    #From indices, extract Sobols
    if(!is.null(parl)){
      #If paralel, setting up the cluster
      cl=parallel::makeCluster(parl)
      doParallel::registerDoParallel(cl)
    }else{
      cl=NULL
    }
    S<-calc.shapknn(X=X, 
                 y=y, 
                 method=method, 
                 n.perm=n.perm, 
                 perms=NULL, 
                 d=d, 
                 indices=indices, 
                 comb_weights=NULL, 
                 parl=parl, 
                 clus=cl, 
                 boot=F, 
                 n.limit=n.limit, 
                 n.knn=n.knn, 
                 noise=T, 
                 i=1:nrow(X), 
                 get.sob=T)
    if(inherits(U, "integer") || inherits(U, "numeric")){
      if(U==0){
        S<-1-S
      }
    }
    S<-matrix(S, ncol=1)
    rownames(S)<-rnames
    colnames(S)<-cname
    out<-list("call"=match.call(),
              "Sobol"=S,
              "indices"=indices[-1],
              "method"=method,
              "conf_int"=NULL,
              "X"=X,
              "y"=y,
              "U"=U,
              "n.knn"=n.knn,
              "rescale"=rescale,
              "n.limit="=n.limit,
              "boot.level"=boot.level,
              "noise"=noise,
              "boot"=boot,
              "nboot"=nboot,
              "parl"=parl,
              "conf"=conf
    )
    class(out)<-"sobol_knn"
    
    if(boot){
      #TODO Mieux travailler le warning.
      warning("Bootstrap confidence intervals may not be theoretically sound.")
      boot.size<-round(n*boot.level)
      sample.boot <- function(data,mle){
        out <- data[sample.int(n=nrow(data), size=mle),,drop=FALSE]
        return(out)
      }
      
      if(!is.null(parl)){
        res.boot <- boot::boot(data=cbind(y,X), statistic=calc.shapknn, 
                               R = nboot, 
                               sim = "parametric", 
                               ran.gen = sample.boot, 
                               mle = boot.size,
                               cl=cl,
                               n.perm=NULL,
                               perms=NULL,
                               ncpus=parl,
                               parallel="snow",
                               y=y,
                               d=d,
                               method=method,
                               indices=indices,
                               comb_weights=NULL,
                               parl=NULL,
                               clus=NULL,
                               boot=T,
                               n.limit=n.limit,
                               n.knn=n.knn,
                               noise=T,
                               get.sob=T)
      }else{
        res.boot <- boot::boot(data=cbind(y,X), statistic=calc.shapknn, 
                               R = nboot, 
                               sim = "parametric", 
                               ran.gen = sample.boot, 
                               mle = boot.size,
                               y=y,
                               d=d,
                               method=method,
                               n.perm=NULL,
                               perms=NULL,
                               indices=indices,
                               comb_weights=NULL,
                               parl=NULL,
                               clus=NULL,
                               boot=T,
                               n.limit=n.limit,
                               n.knn=n.knn,
                               noise=T,
                               get.sob=T)
        
      }
      
      #Bootstraped results for Total indices
      out$conf_int<-bootstats(res.boot, conf, "basic")
      if(inherits(U, "integer") || inherits(U, "numeric")){
        if(U==0){
          out$conf_int[,c(1,4,5)]<-1-out$conf_int[,c(1,4,5)]
          #Switching min/max CI names
          colnames(out$conf_int)<-colnames(out$conf_int)[c(1,2,3,5,4)]
          out$conf_int<-out$conf_int[,c(1,2,3,5,4)]
        }
      }
      rownames(out$conf_int)<-rnames
    }
    #If parallel, stopping the cluster
    if(!is.null(parl)){parallel::stopCluster(cl)}
    return(out)
  }
}


#######################################
# Custom methods

print.shapleysobol_knn<-function(x, ...){
  cat("\nCall:\n", deparse(x$call), "\n", sep = "")
  if(x$method=="knn"){
    cat("\nShapley effects estimation by nearest-neighbor procedure\n")
  }else if(x$method=="rank"){
    cat("\nShapley effects estimation by ranking procedure\n")
  }


  if(x$boot){
    print(x$conf_int)
  }else{
    print(x$Shap)
  }
}


plot.shapleysobol_knn<-function(x, ylim=c(0,1), ...){
  if(x$method=="knn"){
    plot_title="Shapley effects estimation by nearest-neighbor procedure"
  }else if(x$method=="rank"){
    plot_title="Shapley effects estimation by ranking procedure"
  }
  
  plot(x$Shap,
       ylim=ylim,
       axes=F,
       xlab="Inputs",
       ylab="Shapley effects",
       main=plot_title,
       ...)
  axis(2)
  axis(1, at=seq_along(x$Shap), labels=colnames(x$X))
  box()
  graphics::grid()
  
  if(x$boot){
    segments(x0=1:ncol(x$X),
             y0=x$conf_int$`min. c.i.`,
             x1=1:ncol(x$X),
             y1=x$conf_int$`max. c.i.`)
    legend("topright",
           pch=c(1,NA),
           lty=c(NA,1),
           legend=c("Shapley effects",
                    paste(x$conf*100, "% Confidence interval", sep="")),
           bg="white")
  }else{
    legend("topright",
           pch=1,
           legend=c("Shapley effects"),
           bg="white")
  }
}

tell.shapleysobol_knn<-function(x,y,...){
  
  id.obj<-deparse(substitute(x))
  if (! is.null(y)) {
    x$y <- y
  }else if (is.null(x$y)) {
    stop("y not found.")
  }
  
  res<-compute.shapleysobol_knn(X=x$X,
                              y=x$y,
                              method=x$method,
                              n.knn=x$n.knn ,
                              n.limit=x$n.limit,
                              U=x$U,
                              n.perm=x$n.perm,
                              noise=x$noise,
                              rescale=x$rescale,
                              nboot=x$nboot,
                              boot.level=x$boot.level,
                              conf=x$conf,
                              parl=x$parl,
                              boot=x$boot)
  assign(id.obj, res, parent.frame())
  
  return(res)
}

extract.shapleysobol_knn<- function(x, ...){
  if (is.null(x$VE) | is.null(x$Shap)){
    stop("The Shapley effects must be computed (U=NULL) prior to extracting first-order and total Sobol' indices.")
  }
  d<-ncol(x$X)
  Sob<-as.matrix(x$VE[[2]], ncol=1)
  Sob.tot<-1-as.matrix(rev(x$VE[[d]]), ncol=1)
  colnames(Sob)<-"Sobol"
  rownames(Sob)<-rownames(x$Shap)
  colnames(Sob.tot)<-"Total Sobol"
  rownames(Sob.tot)<-rownames(x$Shap)
  return(list("Sobol"=Sob,
              "Sobol.tot"=Sob.tot))
}

#Plotting the Sobol' indices
plot.sobol_knn<-function(x, ylim=c(0,1), ...){
  if(inherits(x$U, "integer") || inherits(x$U, "numeric")){
    if(x$U==0){
      if(x$method=="knn"){
        plot_title=("\nTotal Sobol' indices estimation by nearest-neighbor procedure\n")
      }else if(x$method=="rank"){
        plot_title=("\nTotal Sobol' indices estimation by ranking procedure\n")
      }
      
    }else if(x$U==1){
      if(x$method=="knn"){
        plot_title=("\nFirst order Sobol' indices estimation by nearest-neighbor procedure\n")
      }else if(x$method=="rank"){
        plot_title=("\nFirst order Sobol' indices estimation by ranking procedure\n")
      }
    }
  }else{
    if(x$method=="knn"){
      plot_title=("\nClosed Sobol' indices estimation by nearest-neighbor procedure\n")
    }else if(x$method=="rank"){
      plot_title=("\nClosed Sobol' indices estimation by ranking procedure\n")
    }
  }
  
  plot(x$Sobol,
       ylim=ylim,
       axes=F,
       xlab="Inputs/Subsets",
       ylab="Sobol' indices",
       main=plot_title,
       ...)
  axis(2)
  axis(1, at=seq_along(x$Sobol), labels=rownames(x$Sobol))
  box()
  graphics::grid()
  
  if(x$boot){
    segments(x0=1:ncol(x$X),
             y0=x$conf_int$`min. c.i.`,
             x1=1:ncol(x$X),
             y1=x$conf_int$`max. c.i.`)
    legend("topright",
           pch=c(1,NA),
           lty=c(NA,1),
           legend=c("Sobol' indices",
                    paste(x$conf*100, "% Confidence interval", sep="")),
           bg="white")
  }else{
    legend("topright",
           pch=1,
           legend=c("Sobol' indices"),
           bg="white")
  }
}

#Printing the Sobol' indices
print.sobol_knn<-function(x, ...){
  cat("\nCall:\n", deparse(x$call), "\n", sep = "")
  if(inherits(x$U, "integer") || inherits(x$U, "numeric")){
    if(x$U==0){
      if(x$method=="knn"){
        cat("\n(independent) Total Sobol' indices estimation by nearest-neighbor\n")
      }else if(x$method=="rank"){
        cat("\n(independent) Total Sobol' indices estimation by ranking\n")
      }
      
    }else if(x$U==1){
      if(x$method=="knn"){
        cat("\n(full) First order Sobol' indices estimation by nearest-neighbor\n")
      }else if(x$method=="rank"){
        cat("\n(full) First order Sobol' indices estimation by ranking\n")
      }
    }
  }else{
    if(x$method=="knn"){
      cat("\nClosed Sobol' indices estimation by nearest-neighbor\n")
    }else if(x$method=="rank"){
      cat("\nClosed Sobol' indices estimation by ranking\n")
    }
  }
  
  if(x$boot){
    print(x$conf_int)
  }else{
    print(x$Sobol)
  }
}

ggplot.shapleysobol_knn<-function(data, mapping = aes(), ylim = c(0,1), ..., environment = parent.frame()) {
  x <- data
  if (!is.null(x$Shap)){
    shap <- as.data.frame(x$Shap) 
    colnames(shap) <- "original"
    nodeggplot(list(shap), xname = "Shapley effects", ylim = ylim)
  }else{
    stop("No plot available")
  }
}

ggplot.sobol_knn<-function(data, mapping = aes(), ylim = c(0,1), ..., environment = parent.frame()){
  x <- data
  if(!is.null(x$Sobol)){
    sobols<-as.data.frame(x$Sobol)
    colnames(sobols)<-"original"
    if(typeof(x$U)=="list"){
      x_name<-"Closed Sobol'"
    }else{
      if(x$U==1){
        x_name<-"(full) First Order Sobol'"
      }else if (x$U==0){
        x_name<-"(independent) Total Sobol'"
      }
    }
    nodeggplot(list(sobols), xname=x_name, ylim=ylim)
  }else{
    stop("No plot available")
  }

}

Try the sensitivity package in your browser

Any scripts or data that you put into this service are public.

sensitivity documentation built on Sept. 11, 2024, 9:09 p.m.