R/pme.R
In sensitivity: Global Sensitivity Analysis of Model Outputs and Importance Measures

Documented in ggplot.pme_knn plot.pme_knn pme_knn print.pme_knn tell.pme_knn

############################################
# Needed packages
# library(parallel)
# library(doParallel)
# library(foreach)
# library(gtools)
# library(boot)
# library(RANN)
# library(whitening)
# library(TSP)

#########################
#Input rescaling function
rescale_inputs <- function(X){
  # Mahalanobis whitening, new X has unit diagonal covariance matrix
  X <- whitening::whiten(X, method="ZCA-cor")
  # We then apply a copula transform
  X <- apply(X,2,rank)/nrow(X)
  return(X)
}


############################################
# Var(E[Y|X_A])/Var(Y) estimation by Rank/TSP
estim.VE.rank<-function(dataX, y, subset, n){
  #Subsetting X
  X<-dataX[, subset, drop=F]
  any.cat<-any(sapply(X,class)=="factor")
  p=ncol(X)
  y=c(y)
  
  if(any.cat){
    id.cat=which(sapply(X,class)=="factor")
    id.cont <- setdiff(1:p,id.cat)
    Xtemp <- as.matrix(X[,id.cont,drop=FALSE])
    for (i in 1:length(id.cat)){
      X.factor<-X[,id.cat[i], drop=F]
      df.temp <- X[,id.cat[i],drop=FALSE]; colnames(df.temp) <- "X"
      # Normalization to ensure that two different rows will have unit euclidean distance
      xx.temp <- model.matrix(~X-1,df.temp)*sqrt(2)/2
      Xtemp<-cbind(Xtemp, xx.temp)
    }
    X<-Xtemp
  }
  
  # Use ranking or TSP
  if (ncol(X)>1){
    # Use TSP
    dist.mat <- as.matrix(dist(X))
    tsp <- TSP::TSP(dist.mat)
    tour <- as.integer(TSP::solve_TSP(tsp))
    id.sort <- cbind(tour,tour[c(2:n,1)])
  }else{
    # Use ranking
    id.sort <- sort(X[,1], index.return = TRUE)$ix
    id.sort <- matrix(c(id.sort,id.sort[c(2:n,1)]),ncol=2)
  }
  
  var.cond<-apply(matrix(y[id.sort],ncol=2),1,var)
  
  #Returning an estimate of Var(E[Y|X_A])/Var(Y)
  VE_A<-1-mean(var.cond)/var(y)
  return(VE_A)
}

############################################
# Var(E[Y|X_A])/Var(Y) estimation by KNN
estim.VE.knn<-function(dataX, y, subset, n, n.knn, n.limit){
  #Subsetting X
  X<-dataX[, subset, drop=F]
  any.cat<-any(sapply(X,class)=="factor")
  p=ncol(X)
  y=c(y)
  
  #One Hot Encoding of categorical variables
  if(any.cat){
    id.cat=which(sapply(X,class)=="factor")
    id.cont <- setdiff(1:p,id.cat)
    Xtemp <- as.matrix(X[,id.cont,drop=FALSE])
    for (i in 1:length(id.cat)){
      X.factor<-X[,id.cat[i], drop=F]
      df.temp <- X[,id.cat[i],drop=FALSE]; colnames(df.temp) <- "X"
      # Normalization to ensure that two different rows will have unit euclidean distance
      xx.temp <- model.matrix(~X-1,df.temp)*sqrt(2)/2
      Xtemp<-cbind(Xtemp, xx.temp)
    }
    X<-Xtemp
  }
  if(n<=n.limit){
    ###############################################
    # When n is reasonable, compute exact distances
    
    #Computing distance matrix
    dist.mat<-as.matrix(dist(X))
    #Getting the n.knn closest points for each datapoint
    id.sort <- t(apply(dist.mat,2,order)[1:n.knn,])
  }else{
    ###############################################
    # When n is too large, compute approximated NN
    
    #Getting the approximated n.knn closest points for each datapoint
    id.sort <- RANN::nn2(X,k=n.knn)$nn.idx
  }
  #Estimating Var(Y|X_A)
  var.cond<-apply(matrix(y[id.sort],ncol=n.knn),1,var)
  
  #Returning an estimate of Var(E[Y|X_A])/Var(Y)
  VE_A<-1-mean(var.cond)/var(y)
  return(VE_A)
}

#Returns the Total Sobol' indices from the closed Sobol' indices
get.SobolT<-function(VEs){
  d<-length(VEs)-1
  SobT<-rep(list(0), length(VEs))
  for (ord in 1:d){
    SobT[[ord+1]] = rev(VEs[[d+1]] - VEs[[d+1-ord]])
  }
  return(SobT)
}


#Recursive PV estimation
recur.PV<-function(indices, VEs, parl, cl){
  
  if(!is.null(parl)){doParallel::registerDoParallel(cl)}
  d=length(indices)-1
  Ps<-rep(list(0), d)
  Ps[[1]]=VEs[[2]]
  for(ord in 2:d){
    Ws<-VEs[[ord+1]]
    nbset<-ncol(indices[[ord+1]])#Number of subsets S
    if(!is.null(parl)){
      Ps[[ord]]<-foreach::foreach(i=1:nbset, .combine=cbind)%dopar%{
        S<-c(indices[[ord+1]][,i])
        idx_Spi=which(colSums(matrix(indices[[ord]]%in%S, nrow=length(S)-1, ncol=ncol(indices[[ord]])))==length(S)-1)
        res=Ws[i]/sum(1/(Ps[[ord-1]][idx_Spi]))
        res
      }
    }else{
      Ps[[ord]]<-rep(0, nbset)#Setting up the Ps results
      for(i in 1:nbset){
        S<-c(indices[[ord+1]][,i]) #For every possible subset S of order ord
        idx_Spi=which(colSums(matrix(indices[[ord]]%in%S, nrow=length(S)-1, ncol=ncol(indices[[ord]])))==length(S)-1)#Find every S\{i} for all i
        Ps[[ord]][i]=Ws[i]/sum(1/(Ps[[ord-1]][idx_Spi]))#Recursively compute Ps
      }
    }
  }
  
  return(Ps)
}


########################
#Estimation of all the PVs

calc.pv<-function(X, y, method, d, indices, parl, clus, boot, n.limit, n.knn, noise, tol, marg, i=1:nrow(X)){
                           
  ###############################
  #Pre-Processing
  #Row selection (for Bootstrap estimate, default doesn't change anything)
  if(boot){
    y<-X[,1]
    X<-X[,-1]
  }
  n<-nrow(X)
  
  #List of VEs, arranged by orders (same structure as the indices object)
  VEs<-rep(list(0), d+1)
  
  ####################################
  #Conditional elements estimation
  #For every order, the corresponding VE estimate of each
  #subset of indices are stored in VEs
  for(j in 1:d){
    if(method=='knn'){
      #############################################
      # KNN Computation of the conditional elements
      if(!is.null(parl)){
        #############################
        #Parallelized VE estimation
        VEs[[j+1]]<-parallel::parApply(clus,indices[[j+1]], 2,estim.VE.knn,
                                       dataX=X,
                                       y=y,
                                       n=n,
                                       n.knn=n.knn,
                                       n.limit=n.limit)
        
      }else{
        #############################
        # VE estimation
        VEs[[j+1]]<-apply(indices[[j+1]], 2, estim.VE.knn,
                          dataX=X,
                          y=y,
                          n=n,
                          n.knn=n.knn,
                          n.limit=n.limit)
      }
    }else if(method=='rank'){
      ##################################################
      # Rank/TSP Computation of the conditional elements
      if(!is.null(parl)){
        #############################
        #Parallelized VE estimation
        VEs[[j+1]]<-parallel::parApply(clus,indices[[j+1]], 2,estim.VE.rank,
                                       dataX=X,
                                       y=y,
                                       n=n)
        
      }else{
        #############################
        # VE estimation
        VEs[[j+1]]<-apply(indices[[j+1]], 2, estim.VE.rank,
                          dataX=X,
                          y=y,
                          n=n)
      }
    }
    
  }
  #Allowing the effects not to sum up to one, but to V(E[Y|X])
  if(noise){
    #If noise == T, then we estimate V(E[Y|X])
    if(method=="knn"){
      VEX<-estim.VE.knn(dataX=X,
                        y=y,
                        subset=indices[[d+1]],
                        n=n,
                        n.knn=n.knn,
                        n.limit=n.limit)
      }else if (method=="rank"){
        VEX<-estim.VE.rank(dataX=X,
                          y=y,
                          subset=indices[[d+1]],
                          n=n)
      }
      VEs[[d+1]]=VEX
    }else{
    #If noise == F, V(E[Y|X]) is equal to 1
    VEs[[d+1]]=1
  }
  
  #If PME computation, then change the closed Sobol' indices to the Total Sobol' indices (their marginal)
  if(marg){
    VEs<-get.SobolT(VEs)
  }
  #################################################
  #PV Computation
  
  #Identifying inputs with 0 Total Sobol (zero players)
  if(!is.null(tol)){
    idx.z<-indices[[2]][,which(abs(VEs[[2]])<=tol)]
  }else{
    idx.z<-indices[[2]][,which(VEs[[2]]==0)]
  }
  if(any(idx.z)){
    #Checking if there are other zero (or under tol) coalitions in each order
    if(!is.null(tol)){
      Z.coal.order<-which(sapply(VEs, function(x) any(abs(x)<=tol)))
      #Which coalitions are null coalitions, may be useful someday
      #Z.coal.idx<-sapply(a.coal.order, function(x) return(indices[[x]][,which(VEs[[x]]<=tol),drop=FALSE]), simplify=F)
    }else{
      Z.coal.order<-which(sapply(VEs, function(x) any(x==0)))
      #Which coalitions are null coalitions, may be useful someday
      #Z.coal.idx<-sapply(a.coal.order, function(x) return(indices[[x]][,which(VEs[[x]]<=tol),drop=FALSE]), simplify=F)
    }
    #Getting the cardinality of the biggest zero (or under tol) coalition
    Z.cardMax<-max(Z.coal.order)-1 #It should always be over or equal to 1 since there are zero players
    #Biggest zero coalitions of max cardinality
    if(!is.null(tol)){
      Z.coal.cardMax<-indices[[Z.cardMax+1]][,which(abs(VEs[[Z.cardMax+1]])<=tol), drop=F]
    }else{
      Z.coal.cardMax<-indices[[Z.cardMax+1]][,which(VEs[[Z.cardMax+1]]==0), drop=F]
    }
    #Boolean of size card(idx.z) indicating if they are in all the zero coalitions of max cardinality
    z.zeroPV<-sapply(idx.z, function(x) sum(colSums(Z.coal.cardMax==x))==1)
    #First case: Z.cardMax=d, no one gets anything (should not happen too often)
    if(Z.cardMax==d){
      PV=rep(0,d)
      PV=matrix(PV, ncol=1)
    }else if(Z.cardMax==d-1){
      #Second case: Z.cardMax=d-1, no need to loop
      PV=rep(0,d)
      idx.pv=setdiff(1:d,idx.z[z.zeroPV])
      PV[idx.pv] = VEs[[d+1]]/nrow(Z.coal.cardMax)
      PV=matrix(PV, ncol=1)
    }else{
      #Third case: Recursive computing
      PS.i<-rep(0,d)
      PS.N<-rep(0,d)
      #Setting up VEs and indices by removing every zero coalition
      for(idx.Zcoal in 1:ncol(Z.coal.cardMax)){
        Z.coal=Z.coal.cardMax[,idx.Zcoal]
        indices_<-rep(list(0), d-Z.cardMax+1)
        VEs_<-rep(list(0), d-Z.cardMax+1)
        for(i in 1:(d-Z.cardMax)){
          checkmat<-matrix(is.element(indices[[i+1]], Z.coal), i, ncol(indices[[i+1]]))
          idx_ind_null <- as.vector(which(colSums(checkmat) == 0))
          ind_tmp<-indices[[i+1]][, idx_ind_null, drop=F]
          indices_[[i+1]]<-rbind(ind_tmp, matrix(rep(Z.coal, ncol(ind_tmp)), ncol=ncol(ind_tmp), nrow=length(Z.coal)))
        }
        for(i in 1:(length(indices_)-1)){
          idx.get<-apply(indices_[[i+1]],
                         2,
                         function(x) which(colSums(matrix(indices[[Z.cardMax+i+1]] %in% x,
                                                          nrow=Z.cardMax+i, 
                                                          ncol=ncol(indices[[Z.cardMax+i+1]])))==i+Z.cardMax)
                         )
          
          VEs_[[i+1]]<-VEs[[Z.cardMax+i+1]][idx.get]
        }
        PS<-recur.PV(indices=indices_,
                     VEs=VEs_, 
                     parl=parl,
                     cl=clus)
        idx.var<-as.vector(indices_[[2]][1,])
        PS.i[idx.var]=PS.i[idx.var]+(1/rev(PS[[length(PS)-1]]))
        PS.N=PS.N+1/PS[[length(PS)]]
      }
      PV=matrix(PS.i/PS.N, ncol=1)
    }

  }else{
    PS<-recur.PV(indices=indices,
                 VEs=VEs,
                 parl=parl, 
                 cl=clus)
    PV<-matrix(rev(PS[[d]]/PS[[d-1]]), ncol=1)
  }
  
  if(!boot){
    return(list("PME"=PV,
                "VE"=VEs))
  }else{
    return(as.vector(PV))
  }

}


pme_knn <- function(model=NULL, X, method="knn", tol=NULL, marg=T, n.knn = 2, n.limit = 2000, noise=F, rescale=F, nboot = NULL, boot.level = 0.8, conf=0.95, parl=NULL, ...){
  ###########################################
  #Pre-processing
  d<-ncol(X)
  n<-nrow(X)
  
  ###########################################
  #Computing y
  if(!is.null(model)){
    y=model(X,...)
  }else{
    y=NULL
  }
  ##########################################
  #Arguments check
  
  #X
  if(!(class(X)[1]=="matrix" | class(X)[1]=="data.frame")){
    stop("X must be either a matrix of a data frame.")
  }
  #Setting-up colnames
  if(is.null(colnames(X))){
    colnames(X)<-paste("X", 1:d, sep="")
  }
  #Method
  if(!is.element(method, c("rank", "knn"))){
    stop("method must either be rank or knn.")
  }
  
  #Bootstrap Checks
  if(!is.null(nboot)){
    if(!is.numeric(nboot)){
      stop("The 'nboot' argument must be a positive integer.")
    }else if (nboot%%1!=0 | nboot<0 | length(nboot)!=1){
      stop("The 'nboot' argument must be a positive integer.")
    }else if(nboot==0){
      boot=F
    }else{
      boot=T
    }
  }else{
    boot=F
  }
  
  if(!is.numeric(boot.level)){
    stop("The 'boot.level' argument must be a numeric argument strictly between 0 and 1.")
  }else if(boot.level<=0 | boot.level >=1){
    stop("The 'boot.level' argument must be a numeric argument strictly between 0 and 1.")
  }
  
  #parl
  if(!is.numeric(parl)){
    if(!is.null(parl)){
      stop("The 'parl' argument must be NULL or a positive integer.")
    }
  }else if(parl%%1!=0 | parl<0 | length(parl)!=1){
    stop("The 'parl' argument must be NULL or a positive integer.")
  }else if (parl>parallel::detectCores()){
    parl=parallel::detectCores()-1
    warning(paste("Too many cores specified. Defaulted to ", parl, " cores.", sep=""))
  }
  
  if(!is.null(y)){
    out<-compute.pme_knn(X=X,
                         y=y,
                         method=method,
                         tol=tol,
                         marg=marg,
                         n.knn=n.knn,
                         n.limit=n.limit,
                         noise=noise,
                         rescale=rescale,
                         nboot=nboot,
                         boot.level=boot.level,
                         conf=conf,
                         parl=parl,
                         boot=boot)
    
    out$call=match.call()
  }else{
    out<-list("call"=match.call(),
              "PME"=NULL,
              "VE"=NULL,
              "indices"=NULL,
              "method"=method,
              "conf_int"=NULL,
              "X"=X,
              "y"=y,
              "n.knn"=n.knn,
              "rescale"=rescale,
              "n.limit="=n.limit,
              "boot.level"=boot.level,
              "noise"=noise,
              "boot"=boot,
              "nboot"=nboot,
              "parl"=parl,
              "conf"=conf,
              "marg"=marg,
              "tol="=tol
    )
    class(out)<-"pme_knn"
  }
  return(out)
}



compute.pme_knn<-function(X, y, method, tol, marg, n.knn, n.limit, U, noise, rescale, nboot, boot.level, conf, parl, boot){ 
  ###########################################
  #Pre-processing
  d<-ncol(X)
  n<-nrow(X)
  
  ########################################
  #Data Processing
  
  #Identifying categorical inputs
  is.cat<-sapply(1:d, function(x)"factor"%in%class(X[,x]))
  #Rescaling the inputs if specified
  if(rescale & ncol(X[,!is.cat])>1){
    X[,!is.cat]=rescale_inputs(as.matrix(X[,!is.cat]))
  }
  
  #########################################
  #Computing PME
  #List of subsets by order ([[2]]: subsets of order 1...etc)
  indices<-rep(list(0), d+1)
  #Weights vector, each element is the weight for |A| ([1]:|A|=0, [2]:|A|=1, ...)
    
  #Computing subsets and weights
  ##########################
  #All permutations are used
  for(j in 1:d){
    indices[[j+1]]<-t(gtools::combinations(n=d, r=j))
  }

  ########################################
  #PME KNN Computation
    
  #Preparing parallel cluster
  if(!is.null(parl)){
    cl=parallel::makeCluster(parl)
    doParallel::registerDoParallel(cl)
  }else{
    cl=NULL
  }
    
  res_PME<-calc.pv(X=X,
                      y=y,
                      method=method,
                      d=d,
                      indices=indices,
                      parl=parl,
                      clus=cl,
                      boot=F,
                      n.limit=n.limit,
                      n.knn=n.knn,
                      noise=noise,
                      marg=marg,
                   tol=tol)
    
    colnames(res_PME$PME)<-"original"
    rownames(res_PME$PME)<-colnames(X)
    
    out<-list("call"=match.call(),
              "PME"=res_PME$PME,
              "VE"=res_PME$VE,
              "indices"=indices,
              "method"=method,
              "conf_int"=NULL,
              "X"=X,
              "y"=y,
              "n.knn"=n.knn,
              "rescale"=rescale,
              "n.limit="=n.limit,
              "boot.level"=boot.level,
              "noise"=noise,
              "boot"=boot,
              "nboot"=nboot,
              "parl"=parl,
              "conf"=conf,
              "marg"=marg,
              "tol"=tol
    )
    
    if(boot){
      #TODO Mieux travailler le warning.
      warning("Bootstrap confidence intervals may not be theoretically sound.")
      boot.size<-round(n*boot.level)
      sample.boot <- function(data,mle){
        out <- data[sample.int(n=nrow(data), size=mle),,drop=FALSE]
        return(out)
      }
      
      if(!is.null(parl)){
        res.boot <- boot::boot(data=cbind(y,X), statistic=calc.pv, 
                               R = nboot, 
                               sim = "parametric", 
                               ran.gen = sample.boot, 
                               mle = boot.size,
                               cl=cl,
                               ncpus=parl,
                               parallel="snow",
                               y=y,
                               d=d,
                               method=method,
                               indices=indices,
                               parl=NULL,
                               clus=NULL,
                               boot=T,
                               n.limit=n.limit,
                               n.knn=n.knn,
                               noise=noise,
                               marg=marg,
                               tol=tol)
      }else{
        res.boot <- boot::boot(data=cbind(y,X), statistic=calc.pv, 
                               R = nboot, 
                               sim = "parametric", 
                               ran.gen = sample.boot, 
                               mle = boot.size,
                               y=y,
                               d=d,
                               method=method,
                               indices=indices,
                               parl=NULL,
                               clus=NULL,
                               boot=T,
                               n.limit=n.limit,
                               n.knn=n.knn,
                               noise=noise,
                               marg=marg,
                               tol=tol)
      }
      
      out$conf_int<-bootstats(res.boot, conf, "basic")
      rownames(out$conf_int)<-colnames(X)
    }
    
    #Stopping the cluster after parallelized computation
    if(!is.null(parl)){parallel::stopCluster(cl)}
    
    #Customize the class of the output for custom methods
    class(out)<-"pme_knn"
    return(out)
}


#######################################
# Custom methods

print.pme_knn<-function(x, ...){
  cat("\nCall:\n", deparse(x$call), "\n", sep = "")
  if(x$method=="knn"){
    cat("\nProportional marginal effects estimation by nearest-neighbor procedure\n")
  }else if(x$method=="rank"){
    cat("\nProportional marginal effects estimation by ranking procedure\n")
  }
  
  if(x$boot){
    print(x$conf_int)
  }else{
    print(x$PME)
  }
}


plot.pme_knn<-function(x, ylim=c(0,1), ...){
  if(x$method=="knn"){
    plot_title="Proportional marginal effects estimation by nearest-neighbor procedure"
  }else if(x$method=="rank"){
    plot_title="Proportional marginal effects estimation by ranking procedure"
  }
  
  plot(x$PME,
       ylim=ylim,
       axes=F,
       xlab="Inputs",
       ylab="Proportional marginal effects",
       main=plot_title,
       ...)
  axis(2)
  axis(1, at=seq_along(x$PME), labels=colnames(x$X))
  box()
  graphics::grid()
  
  if(x$boot){
    segments(x0=1:ncol(x$X),
             y0=x$conf_int$`min. c.i.`,
             x1=1:ncol(x$X),
             y1=x$conf_int$`max. c.i.`)
    legend("topright",
           pch=c(1,NA),
           lty=c(NA,1),
           legend=c("Proportional marginal effects",
                    paste(x$conf*100, "% Confidence interval", sep="")),
           bg="white")
  }else{
    legend("topright",
           pch=1,
           legend=c("Proportional marginal effects"),
           bg="white")
  }
}

ggplot.pme_knn<-function(data, mapping = aes(), ylim = c(0,1), ..., environment = parent.frame()) {
  x <- data
  if (!is.null(x$PME)){
    pme <- as.data.frame(x$PME) 
    colnames(pme) <- "original"
    nodeggplot(list(pme), xname = "Proportional marginal effects", ylim = ylim)
  }else{
    stop("No plot available")
  }
}

tell.pme_knn<-function(x,y,...){
  
  id.obj<-deparse(substitute(x))
  if (! is.null(y)) {
    x$y <- y
  }else if (is.null(x$y)) {
    stop("y not found.")
  }
  
  res<-compute.pme_knn(X=x$X,
                                y=x$y,
                                method=x$method,
                                n.knn=x$n.knn ,
                                n.limit=x$n.limit,
                                noise=x$noise,
                                rescale=x$rescale,
                                nboot=x$nboot,
                                boot.level=x$boot.level,
                                conf=x$conf,
                                parl=x$parl,
                                boot=x$boot,
                      marg=x$marg,
                      tol=x$tol)
  assign(id.obj, res, parent.frame())
  
  return(res)
}
Any scripts or data that you put into this service are public.
sensitivity documentation built on Sept. 11, 2024, 9:09 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
sensitivity
Global Sensitivity Analysis of Model Outputs and Importance Measures

R/pme.R
In sensitivity: Global Sensitivity Analysis of Model Outputs and Importance Measures

Defines functions tell.pme_knn ggplot.pme_knn plot.pme_knn print.pme_knn compute.pme_knn pme_knn calc.pv recur.PV get.SobolT estim.VE.knn estim.VE.rank rescale_inputs

Documented in ggplot.pme_knn plot.pme_knn pme_knn print.pme_knn tell.pme_knn

Try the sensitivity package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

sensitivity Global Sensitivity Analysis of Model Outputs and Importance Measures

R/pme.R In sensitivity: Global Sensitivity Analysis of Model Outputs and Importance Measures

Defines functions tell.pme_knn ggplot.pme_knn plot.pme_knn print.pme_knn compute.pme_knn pme_knn calc.pv recur.PV get.SobolT estim.VE.knn estim.VE.rank rescale_inputs

Documented in ggplot.pme_knn plot.pme_knn pme_knn print.pme_knn tell.pme_knn

Try the sensitivity package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

sensitivity
Global Sensitivity Analysis of Model Outputs and Importance Measures

R/pme.R
In sensitivity: Global Sensitivity Analysis of Model Outputs and Importance Measures