R/calplot_241224.R
In glmnetr: Nested Cross Validation for the Relaxed Lasso and Other Machine Learning Models

Documented in calplot

################################################################################
 
#  library(glmnetr)
#  library(splines)
#  library(survival)
 
################################################################################
################################################################################

#' Construct a single calibration plot for a nested.glmnetr output object 
#'
#' @param object1 A nested.glmnetr() output object for calibration
#' @param wbeta  Which Beta should be plotted, an integer.  This will depend on
#' which machine learning models were run when creating the output object.   If 
#' unsure the user can run the function without specifying wbeta and a legend 
#' will be directed to the console.  
#' @param df The degrees of freedom for the spline function, default=5. 
#' @param resample 1 to base the splines on the leave out X*Beta's ($xbetas.cv), or 0 to 
#' use the naive X*Beta's ($xbetas).
#' @param oob 1 (default) to construct calibration plots using the out of bag 
#' data points, 0 to use inbag (including resampled data points) data 
#' points. Only applies when bootstrap is used instead of k-fold cross 
#' validation. 
#' @param bootci Calculate bootstrap confidence ientervals for calibration 
#' curves. This is for exploration only. The applicability of bootstrap 
#' confidence intervals for these calibration curves is questionable. 
#' @param trim the percent of top and bottom of the data to be trimmed away when 
#' producing plots.
#' @param plot 1 by default to plot the calibration
#' @param lwd line width for plots, 1 by default
#' @param title title for inclusion on plots, default NULL 
#' @param xlim xlim for the plots.  This does not effect the curves within 
#' the plotted region.  Caution, for the "cox" framework the xlim are specified
#' in terms of the X*beta and not the HR, even when HR is described on the axes.
#' @param ylim ylim for the plots, which will usually only be specified in a 
#' second run of for the same data.  This does not effect the curves within 
#' the plotted region.  Caution, for the "cox" framework the ylim are specified
#' in terms of the X*beta and not the HR, even when HR is described on the axes.
#' @param xlab a user specified label for the x axis
#' @param ylab a user specified label for the y axis
#' @param col.term a number for the line depicting the overall calibration estimates
#' @param col.se a number for the line depicting the +/- 2 * standard error 
#' lines for the overall calibration estimates 
#' @param rug 1 to plot a rug for the model x*betas, 0 (default) to not.
#' @param plothr  a power > 1 determining the spacing of the values 
#' on the axes, e.g. 2, exp(1), sqrt(10) or 10.  The default of 0 plots the 
#' X*Beta.  This only applies for "cox" survival data models.   
#' @param repn The replication or repeat number for (nested) CV or bootstrap.
#' @param knottype 1 (default) to use XBeta used for the spline fit to 
#' choose ns() knots for gaussian and binomial families, 2 to use the XBeta
#' from all re-samples to determine the knots.  
#' @param ... allowance to pass terms to the invoked plot function
#'
#' @return Plots are returned by default, and optionally data for plots are 
#' output to a list.
#' 
#' @importFrom stats sd quantile 
#' @importFrom utils str 
#' @importFrom graphics box rug title
#' @importFrom survival coxph pspline
#' @importFrom splines ns
#' 
#' @author Walter Kremers (kremers.walter@mayo.edu)
#' 
#' @noRd

calplot0 = function(object1, wbeta=NULL, df=5, resample=1, oob=1, bootci=0, repn=NULL, knottype=1,
                    trim=0, plot=1, lwd=1, title=NULL, xlim=NULL, ylim=NULL, xlab=NULL, ylab=NULL, 
                    col.term=1, col.se=2, rug=1, plothr=0, ... ) {
#  print(c(100, oob ))  
  ## resample replaces cv 
  ## repn replaces fold 
  #  object1 = object ; wbeta = 5 ; df = 3 ; resample = 1 ; trim = 0 ; repn = 1 ; plot = 1 ; lwd = 1 ; title=NULL ; xlim=NULL ; ylim=NULL ; xlab=NULL ; ylab=NULL ; col.term=1 ; col.se=2 ; rug=1 ; plothr=0 ; knottype=0 ; 
  #  plot=plotfold_ ; title=paste("repn",i_) ; 
  
#  object1 = object ; repn=NULL ; knottype=1 ; lwd=2 ;                           ## naive calibration plot test 
  
  family = object1$sample[1] 
  bootstrap =  as.numeric( object1$tuning[8] ) 
  if (is.na(bootstrap)) { bootstrap = 0 }
  folds_n = as.numeric( object1$tuning[1] )
  
  if (!(knottype %in% c(-1,0,1,2,3))) { knottype = 1 } 
#  if ( bootstrap == 0 ) { bootci = 0 }
#  if ( bootstrap == 1) {
#    if ( (bootci == 1) & (oob == 1) ) { cat("  For (bootci == 1) & (oob == 1), oob is set to 0") ; oob = 0 ; }
#  }
  
#  print(c(resample,bootstrap,oob,folds_n)) 
  
  ## check that resampling was done in original analysis 
  if (resample == 1) {
    if (bootstrap == 0) {
      if ( is.null(object1$xbetas.cv) ) {
        resample = 0 
        cat(paste("  xbeta.cv is NULL so resample set to 0\n"))
      }
    }
    if (bootstrap >= 1) {
      if (is.null(object1$xbetas.boot.oob) ) {
        resample = 0 
        cat(paste("  xbeta.boot is NULL so resample set to 0\n"))
      }
    }
  }
  
  stop_ = 0 
  
  ##### get range of full model data ###########################################
  minxb = min( object1$xbetas[,wbeta ] )                                        # object1$xbetas[1:10 , wbeta, drop=FALSE ] # xb.hat.p[1:10]
  maxxb = max( object1$xbetas[,wbeta ] )
  fully_ = object1$y_                                                           # length(fully_) # fully_[1:20] # yt[1:20] # (fully_[1:20]<0)
  
#    print( data.frame(cbind( varxb, num=1:length(varxb))) )
  
##---------------------
  
#  print(c(resample,bootstrap,oob)) 
  if (resample == 1) { 
    if (bootstrap == 0) {
      origxb = object1$xbetas.cv[,wbeta ]  
      xbname = colnames(object1$xbetas.cv)[wbeta] 
    } else if (bootstrap >= 1) {
      if (oob == 1) { 
        origxb = object1$xbetas.boot.oob[,wbeta+2 ]  
        xbname = colnames(object1$xbetas.boot.oob)[wbeta+2] 
      } else {
        origxb = object1$xbetas.boot.inb[,wbeta+2 ]  
        xbname = colnames(object1$xbetas.boot.inb)[wbeta+2] 
#        print("Here 241010 1")
      }
    }
  } else { ## if (resample == 0)
    origxb = object1$xbetas[,wbeta]                                             ## cbind(origxb , xb.hat.p )[1:20,]
    xbname = colnames(object1$xbetas)[wbeta]  
  }
  
  ##---------------------

  ## select subset for single fold or bootstrap sample, if applicable ----------
  if ( (resample == 1) & (!is.null(repn)) ) {
    if (bootstrap == 0) {
      foldid = object1$foldid 
      table( foldid ) 
      xb_ = origxb[ (foldid == repn) ] 
      y_  = fully_[ (foldid == repn) ] 
    } else if (bootstrap >= 1) {
      if (oob == 1) { 
        xb_ = origxb[ object1$xbetas.boot.oob[,1]==repn ]                       ## check this 
        oob_tmp = object1$xbetas.boot.oob[object1$xbetas.boot.oob[,1]==repn,2]  ## out of bag sequence IDs 
        y_  = fully_[ oob_tmp ]                                                 ## get correpsoing y_'s 
      } else { 
        xb_ = origxb[ object1$xbetas.boot.inb[,1]==repn ]                       ## check this 
        inb_tmp = object1$xbetas.boot.inb[object1$xbetas.boot.inb[,1]==repn,2]  ## out of bag sequence IDs 
        y_  = fully_[ inb_tmp ]                                                 ## get corresponding y_'s 
      } 
    }
  }
  ##---------------------
  
  ## data for all rows -- really only makes sense for cross validation ---------
  ## and only for inspection, not intended for inference -----------------------
  if ( (resample == 1) & (is.null(repn)) ) {
    if (bootstrap == 0) {
      y_  = fully_
      xb_ = origxb
    } 
    if (bootstrap >= 1) {                              
      xb_ = origxb 
      for ( i_ in c(1:object1$tuning[8])) {
        oob_tmp = object1$xbetas.boot.oob[object1$xbetas.boot.oob[,1]==i_,2]
        y_tmp  = fully_[oob_tmp]
        xb_tmp = origxb[oob_tmp]
        if (i_ == 1) { 
          resampno = rep(1,length(y_tmp))
          y_ = y_tmp
          xb_ = xb_tmp
        } else if (i_ != 1) { 
          resampno = rep(i_,length(y_tmp))
          y_  = c(y_ ,y_tmp) 
          xb_ = c(xb_,xb_tmp) 
        } 
      }
    }
  }
  ##---------------------
 
  ##----- calibrate naively without resampling, i.e. using model based upon all data ---------------------
  ##----- uses folds but more for inspection than inference ---------
  ##                                                                            ## check, avoid for bootstrap >=1, resample = 0 and !is.null(repn)
  if ( (resample == 0) & (!is.null(repn)) ) {
    foldid = object1$foldid 
    y_  = fully_[ (foldid == repn) ]  
    xb_ = origxb[ (foldid == repn) ] 
  }
  ##---------------------
  
  ##---------------------
  if ( (resample == 0) & (is.null(repn)) ) {
    y_  = fully_                                                                # length(y_) # y_[1:20] # yt[1:20]
    xb_ = origxb                                                                ## cbind(xb_ , xb.hat.p )[1:20,]
#    print(xb_[1:10])
  }
  ##---------------------
  
  c( length( y_ ) , length( xb_ ) ) 
  
  ## get range of actual data for plotting individual spline fits 
  minxb_ = min(xb_) 
  maxxb_ = max(xb_) 

#  print(c(bootci,resample,repn))

  ## reorder data in terms of xb_ 
  ordr = order(xb_)                                                             ## table(table(ordr)) ; summary(xb_) ; summary(y_) ; 
  xb_ = xb_[ordr] 
  y_  = y_ [ordr]                                                               ## summary(xb_) ; summary(y_) ; 
#  cat(paste("3 var xb_ = ", var(xb_), "\n"))
  
  if (family == "cox") {
    #   length(table(table(xb_))) 
    lxb_ = length(unique(xb_))
    if ( lxb_ < (df+1))  { df = lxb_ - 1 }
    if (df > 1) {
      object2 = coxph(y_ ~ pspline(xb_,df=df), data=as.data.frame(cbind(y_, xb_)) )                         # termplot(object2,1) ; abline(a=0,b=1) ; title("calplot")
      #      object3 = coxph(Surv(yt,event) ~ pspline(xb_,df=df), data=as.data.frame(cbind(yt, event, xb.hat.p)) ) # termplot(object3,1) ; abline(a=0,b=1) ; title("predict()")
    } else {
      object2 = coxph(y_ ~ xb_, data=as.data.frame(cbind(y_, xb_)) ) 
    }
  } else {
    if (var(xb_)>0) { 
      #        cat( paste( "knottype = " , knottype, "\n" ) )   
      if        (knottype == 1) { myspline = ns(xb_, df=df, intercept=FALSE)  
      } else if (knottype == 2) { myspline = ns(origxb, df=df, intercept=FALSE)
      } else if (knottype == 3) { myspline = ns(object1$xbetas[,wbeta ], df=df, intercept=FALSE) 
      } else if (knottype <= 0) { myspline = ns(xb_, df=df, intercept=FALSE) } 
      if (knottype<=0) { 
        print("Outside spline") 
        print(attr(myspline,c("degree")))
        print(attr(myspline,c("knots")))
        print(attr(myspline,c("Boundary.knots")))
        print(attr(myspline,c("intercept")))
      }
      if (knottype %in% c(1,2,3)) { 
        Knots = attr(myspline,"knots")
        Boundary.Knots = attr(myspline,"Boundary.knots")
        object2 = glm(y_ ~ ns(xb_,knots=Knots, Boundary.knots=Boundary.Knots) , family=family ) # data=as.data.frame(cbind(y_, xb))  
      } 
      if (knottype <= 0) { 
        object2 = glm(y_ ~ ns(xb_,df=df) , family=family ) # data=as.data.frame(cbind(y_, xb))  
        if (knottype==-1) { str(object2$model) }
      } 
    } else {
      object2 = glm(y_ ~ 1 , family=family ) # data=as.data.frame(cbind(y_, xb))  
    }
    summary(object2)
  }
  
  if ( (resample == 1) & (bootstrap >= 1) & (family == "cox") ) {
    xbetas = object1$xbetas[,wbeta]                                        

    mean_xb_full = mean( xbetas ) 
    mean_xb_sub  = mean( xb_    ) 

    mean_xb_full_middle = mean( xbetas[ ( (xbetas > quantile(xbetas,0.30)) & (xbetas < quantile(xbetas,1-0.3)) ) ] )   
    if        (oob == 1) { sub_ids = object1$xbetas.boot.oob[object1$xbetas.boot.oob[,1]==repn,2]     ## out of bag sequence IDs 
    } else if (oob == 0) { sub_ids = object1$xbetas.boot.inb[object1$xbetas.boot.inb[,1]==repn,2] }   ## in bag sequence IDs 
    xb_full_sub = xbetas[ sub_ids ]
    mean_xb_sub_middle  = mean( xb_full_sub[ ( (xb_full_sub > quantile(xb_full_sub,0.30)) & (xb_full_sub < quantile(xb_full_sub,1-0.3)) ) ] ) 

    ADJ = ( mean_xb_full - mean_xb_sub) + ( mean_xb_full_middle - mean_xb_sub_middle )
    ADJA = ( mean_xb_full - mean_xb_sub) 
#    cat(paste("  repn = ", repn, "  ADJ = ", ADJ, "  ADJA = ", ADJA, "  mean_xb_full = ", mean_xb_full, "\n"))
#    ADJA = ( mean_xb_sub) ## bad 
#    ADJA = ( mean_xb_full ) ##  
#    ADJA = 0
    ADJ = ADJA 
  } else { ADJ = 0 } 

  
  plotxb = minxb + rep(0:100)*(maxxb-minxb)/100
  
  ## mean(predict(object2)) ## 9e-16
  meancalxb = mean(predict(object2, data.frame(xb_=origxb - ADJ)))               ## of any use? 
  #      cat( paste(title, meancalxb, "\n" ) )  ## 9e-16
  meanxb_ = mean(xb_)
#  dfr = data.frame(xb_=plotxb)                   ## title="derived range" 
#  dfr = data.frame(xb_=xb_)                     ## title="original data"
#  est = predict(object2, newdata=dfr)   
  predicteds = predict(object2, newdata=data.frame(xb_=plotxb-ADJ), se=TRUE)  
  est = predicteds$fit
  se = predicteds$se.fit
  estci = cbind(plotxb, est, se, est - 2 * se, est + 2 * se)
#  estci = cbind(plotxb, est, se, est - qnorm(0.975) * se, est + qnorm(0.975) * se)
  
  lty1 = 1 ; lty2 = 2 ;   
  if (is.null(ylab)) {
    if (family %in% c("cox") ) {
      if (plothr > 1) {
        ylab = "Calibrated HR"
      } else {
        ylab = "Calibrated log(HR)"
      }
    } else if (family %in% c("binomial","gaussian") ) {
      ylab = "Calibrated X*Beta"
    } 
  }
  if (is.null(xlab)) { xlab = myxlab(family, plothr, resample, oob, xbname, bootstrap, bootci) }
  xlims = quantile(xb_, (c(trim,(100-trim))/100))
  #    print(xlims)
  summary(estci[,1])
  #    estci_ = estci[((estci[,1]>=xlims[1]) & (estci[,1]<= xlims[2])),]
  #    summary(estci_[,1])
  if (is.null(xlim)) { xlim = c(minxb, maxxb) }
  if (is.null(ylim)) { ylim = c(min(estci[,2]), max(estci[,2])) }
  if (plot %in% c(1,2)) {
    if ((minxb < minxb_) | (maxxb > maxxb_)) { 
      indxlo = c(1:length(estci[,1])) [ estci[,1] < minxb_ ] 
      indxhi = c(1:length(estci[,1])) [ estci[,1] > maxxb_ ] 
      if ( length( c(indxlo,indxhi) ) > 0) {
        indxfold = c(1:length(estci[,1])) [-c(indxlo,indxhi) ] 
      } else {
        indxfold = c(1:length(estci[,1]))
      }
      plot(  estci[indxlo,1], estci[indxlo,2] , type="l", 
             lty=lty2, col=col.term, lwd=lwd, xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim, axes=0 , ... ) 
      lines( estci[indxlo,1], estci[indxlo,4], lty=lty2, lwd=lwd, col=col.se ) 
      lines( estci[indxlo,1], estci[indxlo,5], lty=lty2, lwd=lwd, col=col.se ) 
      lines( estci[indxfold,1], estci[indxfold,2], lty=lty1, lwd=lwd, col=col.term ) 
      lines( estci[indxfold,1], estci[indxfold,4], lty=lty1, lwd=lwd, col=col.se ) 
      lines( estci[indxfold,1], estci[indxfold,5], lty=lty1, lwd=lwd, col=col.se ) 
      lines( estci[indxhi,1], estci[indxhi,2], lty=lty2, lwd=lwd, col=col.term ) 
      lines( estci[indxhi,1], estci[indxhi,4], lty=lty2, lwd=lwd, col=col.se ) 
      lines( estci[indxhi,1], estci[indxhi,5], lty=lty2, lwd=lwd, col=col.se ) 
    } else {
      plot(  estci[,1], estci[,2] , type="l", col=col.term, lwd=lwd, xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim, axes=0 , ... ) 
      lines( estci[,1], estci[,4] , lty=lty1, lwd=lwd, col=col.se ) 
      lines( estci[,1], estci[,5] , lty=lty1, lwd=lwd, col=col.se )
    }
    
    if (family == "cox") {
      abline(a=-meanxb_, b=1, lty=3)
      abline(h=0, lty=1)
      myaxis(ylim, plothr, est , side=2)
      myaxis(xlim, plothr, plotxb, side=1)
    } else { 
      abline(a=0, b=1, lty=3)
      axis(side=2)
      axis(side=1)
    }
    box() 
    
    keep_  = ((xb_ >= xlim[1]) & (xb_ <= xlim[2]))
    if (sum(keep_==0) > 0) {
      cat(paste("  Due to user specified xlim ", sum(keep_==0) , "tick marks are not displayed repn (replicaiton number) ", repn, ")\n"))
    }
#    print(length(xb_))
    xb_ = xb_[ keep_ ]
    y_  = y_ [ keep_ ]
#    print(length(xb_))
    if (rug == 1) { myrug(xb_, y_, family) } 
    if (!is.null(title)) { title ( title ) }
    #        termplot(object2, terms=1,se=TRUE, xlab=xlab, ylab="Calibrated log(HR)", lwd.term=lwd, lwd.se=lwd )
  }
  if (plot %in% c(0,2)) {
    return( list( estci=estci, meancalxb=meancalxb, meanxb_=meanxb_, minmax=c(minxb_,maxxb_), xb_=xb_, y_=y_ ) )
  }
}

#
################################################################################
#
#' Construct calibration plots for a nested.glmnetr output object 
#' 
#' Using k-fold cross validation this function constructs calibration plots for 
#' a nested.glmnetr output object. Each hold out subset of the k-fold cross 
#' validation is regressed on the x*beta predicteds based upon the 
#' model fit using the non-hold out data using splines.  This yields k spline 
#' functions for evaluating model performance.  These k spline functions are 
#' averaged to provide an overall model calibration.  Standard deviations of 
#' the k spline fits are also calculated as a function of the predicted X*beta, 
#' and these are used to derive and plot approximate 95% confidence intervals 
#' (mean +/- 2 * SD/sqrt(k)). Note, standard errors calculated in this manner 
#' may underestimate (or overestimate?) the true standard error, the 
#' displayed confidence intervals might be too narrow for a 95% coverage 
#' probability and should be interpreted with caution. See the package vignettes 
#' for discussion and references. Further, because regression equations can be 
#' unreliable when extrapolating beyond the data range used in model derivation, 
#' we display this overall calibration fit and CIs with solid lines only for 
#' the region which lies within the ranges of the predicted x*betas for  
#' all the k leave out sets.  The spline fits are made using the same framework 
#' as in the original machine learning model fits, i.e. one of "cox", "binomial" 
#' or "gaussian"family. For the "cox" framework the pspline() funciton is used, 
#' and for the "binomial" and "gaussian" frameworks the ns() function is 
#' used.  Predicted X*betas beyond the range of any of the hold 
#' out sets are displayed by dashed lines to reflect the lessor certainty when 
#' extrapolating even for a single hold out set.   
#' 
#' Optionally, for comparison,  
#' the program can fit a spline based upon the predicted x*betas ignoring the 
#' cross validation structure, or one can fit a spline using the x*betas 
#' calculated using the model based upon all data. 
#'
#' @param object A nested.glmnetr() output object for calibration
#' @param wbeta  Which Beta should be plotted, an integer.  This will depend on
#' which machine learning models were run when creating the output object.   If 
#' unsure the user can run the function without specifying wbeta and a legend 
#' will be directed to the console.  
#' @param df The degrees of freedom for the spline function 
#' @param resample 1 to base the splines on the leave out X*Beta's ($xbetas.cv 
#' or $xbetas.boot.oob), or 0 to use the naive X*Beta's ($xbetas).  This can be done 
#' to see biases associated with the naive approach. 
#' @param oob 1 (default) to construct calibration plots using the out-of-bag 
#' data points, 0 to use in bag (including resampled data points) data 
#' points. This option only applies when bootstrap is used instead of k-fold 
#' cross validation, and when resample is set to 1. For cross validation 
#' evaluations out-of-bag samples (folds) are always used for evaluation. The 
#' purpose of oob = 0 is to allow evaluation of the variability of 
#' bootstrap calibrations ignoring bias like done in Riley et al., 2023,
#' doi: 10.1186/s12916-023-03212-y and Austin and Steyerberg 2013, 
#' doi: 10.1002/sim.5941
#' @param bootci 1 to calculate bootstrap confidence intervals for calibration 
#' curves adjusting for bias, 0 (default) to simply plot the calibration curves 
#' based upon the inbag data. This is for exploration only, and only when 
#' bootstrap samples were used for model performance evaluation. The 
#' applicability of bootstrap confidence intervals for these calibration curves 
#' is questionable. If bootci is set to 1 then oob is set to 0.   
#' @param plot 1 by default to produce plots, 0 to output data for plots only, 
#' 2 to plot and output data. 
#' @param plotfold 0 by default to not plot the individual fold calibrations, 1 
#' to overlay the k leave out spline calibration fits in a single figure and 2 
#' to produce separate plots for each of the k hold out calibration curves. 
#' @param plot_full plot full data 
#' @param plothr  a power > 1 determining the spacing of the values 
#' on the axes, e.g. 2, exp(1), sqrt(10) or 10.  The default of 0 plots the 
#' X*Beta.  This only applies fore "cox" survival data models. 
#' @param knottype 1 (default) to use XBeta used for the spline fit to 
#' choose knots in ns() for gaussian and binomial families, 2 to use the XBeta
#' from all re-samples to determine the knots. 
#' @param trim the percent of top and bottom of the data to be trimmed away when 
#' producing plots.  The original data are still used used calcualting the curves 
#' for plotting. 
#' @param vref Similar to trim but instead of trimming the spline lines, plots 
#' vertical refence lines aht the top vref and bottom vref percent of the model
#' X*Betas's
#' @param xlim xlim for the plots.  This does not effect the curves within 
#' the plotted region.  Caution, for the "cox" framework the xlim are specified
#' in terms of the X*beta and not the HR, even when HR is described on the axes.
#' @param ylim ylim for the plots, which will usually only be specified in a 
#' second run of for the same data.  This does not effect the curves within 
#' the plotted region.  Caution, for the "cox" framework the ylim are specified
#' in terms of the X*beta and not the HR, even when HR is described on the axes.
#' @param xlab a user specified label for the x axis
#' @param ylab a user specified label for the y axis
#' @param col.term a number for the line depicting the overall calibration estimates
#' @param col.se a number for the line depicting the +/- 2 * standard error 
#' lines for the overall calibration estimates 
#' @param rug 1 to plot a rug for the model x*betas, 0 (default) to not.
#' @param seed an integer seed used to random select the multiple of X*Betas
#' to be used in the rug when using bootstraping for model evaluation as sample 
#' elements may be included multiple times as test (Out Of Bag) data.
#' @param cv Deprecated. Use resample option instead.
#' @param fold Deprecated. This term is now ignored.
#' @param ... allowance to pass terms to the invoked plot function  
#'
#' @return Calibration plots are returned by default, and optionally data for plots 
#' are output to a list.
#' 
#' @seealso
#'   \code{\link{plot.nested.glmnetr}} , \code{\link{summary.nested.glmnetr}} , \code{\link{nested.glmnetr}} 
#'   
#' @author Walter Kremers (kremers.walter@mayo.edu)
#'
#' @export
#'
calplot = function(object, wbeta=NULL, df=3, resample=NULL, oob=1, bootci=0, 
                   plot=1, plotfold=0, plot_full=0, plothr=0, knottype=1, trim=0, vref=0, xlim=NULL, ylim=NULL, 
                   xlab=NULL, ylab=NULL, col.term=1, col.se=2, rug=1, seed=NULL, cv=NULL, fold=NULL, ... ) {
usefold = 1   
# print(c(101, oob ))  
#  cat( paste( " knottype 1 =", knottype, "\n")) 
# wbeta=5 ; df=3 ; resample=1 ; oob=1 ; bootci=0 ; trim=0 ; vref=0 ; plot=1 ; plotfold=0 ; xlim=NULL ; ylim=NULL ; xlab=NULL ; ylab=NULL ; col.term=1 ; col.se=2 ; rug=1 ; seed=NULL ; plothr=0 ; knottype=0 ; resample=1 ; fold=NULL ; cv=NULL ; 
# usefold=1 ; plot = 2 ; 
# resample=0 ; 

# object=nested.bin.boot.fit ; wbeta=16 ; bootci=0 ; oob=0 ; plotfold=1 ;

# object = nested.bin.fit3B 
# object = nested.bin.fit10Boot 
# object=nps_csf_glo_ratcap_fit
# object=nested_glmnetr_fit_hp_las
# object = nested.cox.fit 
# object1 = object ;
  
#  object1 = object ;  lwd=2 ;  trim = 0 ; usefold=NULL ;  
#  wbeta=5 ; resample=0  ; usefold=0 ; plotfold=0 ; ylim=c(-0.2,0.1) ; 
  
#  cat(paste("HERE1",ylim[1],ylim[2],"\n"))
  
  ## for random selection of subset of resamples for plotting 
  if (!is.null(seed)) { 
    seedr_ = seed
  } else { 
    x_ = object$seed$seedr  
    seedr_ = x_[length(x_)] 
  }
  
  set.seed( seedr_ ) 
  
##  if ( (!is.null(fold)) & (is.null(usefold)) ) { 
##    usefold = fold
##    cat(paste("  fold is deprecated and will be dropped from future versions. Use usefold instead\n", 
##              " as its meaning is clearer, though it also applies to bootstrap resamples." ))
##  } else if ( is.null(usefold) ) { usefold = 1 } 
  
  if ((!is.null(cv)) & (is.null(resample))) {
    resample = cv 
    cat(paste("  cv is deprecated and will be dropped from future versions. Use resample instead\n", 
                " as its meaning is clearer, as it also applies to bootstrap resamples." ))
  }
  if (is.null(resample)) { 
    resample = 1 
  } 
  
  #----------------------------------------
  
  if ( plothr == 1 ) { plothr = exp(1) }
  
  family = object$sample[1] 
  tuning = object$tuning 
  bootstrap = as.numeric( tuning[8] )
  if (is.na(bootstrap)) { bootstrap = 0 }
  unique    = as.numeric( tuning[9] )
  nobs = object$sample[2] 
  
  if ((bootstrap == 0) & (is.null(object$xbetas.cv))) {
    resample = 0 
    cat(paste("  xbeta.cv is NULL so resample set to 0\n"))
  } 
  if ((bootstrap >= 1) & (is.null(object$xbetas.boot.oob))) {
    resample = 0 
    cat(paste("  xbeta.boot.oob is NULL so resample set to 0\n"))
  } 
  
  if (!(knottype %in% c(-1,0,1,2,3))) { knottype = 1 } 
  if ( ( bootstrap == 0 ) & ( bootci == 1) ) { bootci = 0 ; cat("  bootci does not apply for cross validation evaluation so bootci is ignored") ; }
  if ((bootstrap >= 1) & (bootci == 1)) {
    if (resample != 1) { cat("  For (bootci == 1) & (resample == 0), resample is set to 1\n") ; resample = 1 ; }
    if (oob      != 0) { cat("  For (bootci == 1) & (oob == 1), oob is set to 0\n") ; oob = 0 ; }
    if (usefold  != 1) { usefold = 1 ; cat("  For (bootci == 1) & (usefold == 0), usefold is ignored\n") }
    if (knottype != 1) { knottype= 1 ; # cat("  For (bootci == 1) & (knottype == 0), usefold is ignored\n")
    }
  }
  
#  print(c(resample, bootstrap, usefold))
  
  if ( (resample == 0) & (usefold == 1) ) {
    usefold = 0 
#    cat(paste( "For (resample==0), usefold is set to 0\n"))
  }

  stop_ = 0 
  
  if (is.null(wbeta)) {
    cat(paste(" specify num for wbeta = \n"))
    if (resample == 1) {  
      if (bootstrap == 0) {
        colnames(object$xbetas.cv)[1] = "null"
        Var = diag(cov((object$xbetas.cv))) 
      } else if (bootstrap >= 1) {
#        colnames(object$xbetas.boot.oob)[c(1,2)] = c("rep","oobid") 
#        object$xbetas.boot.oob[,-1] 
#        cov(object$xbetas.boot.oob[,-1])
        Var = diag(cov(object$xbetas.boot.oob[,-c(1,2)])) 
      }
    } else { 
      colnames(object$xbetas)[1] = "null"
      Var = diag(cov((object$xbetas)))
    } 
    if (min(Var) > 0.01) { Var = round(Var,digits=2) 
    } else if (min(Var) > 0.001) { Var = round(Var,digits=3) 
    } else if (min(Var) > 0.0001) { Var = round(Var,digits=4) 
    } else if (min(Var) > 0.00001) { Var = round(Var,digits=5) 
    }
    print( data.frame(cbind( Var, num=1:length(Var))) )
    stop_ = 1 
  } 
  
  if (!(resample %in% c(-1,0,1))) { stop_ = 1 }                                 ## -1 ?? 
  
  if (resample == 0) { oob = 0 }
  
  if ( (stop_ == 0) & (usefold == 0) ) {
    estci = calplot0(object, wbeta=wbeta, df=df, resample=resample, oob=oob, repn=NULL, knottype=1, 
                     trim=trim, plot=plot, lwd=2, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, 
                     col.term=col.term, col.se=col.se, rug=rug, plothr=plothr , ... )$estci 
    stop_ = 2 
#    print( estci )
  }
  
  if ( (stop_ == 0) & (resample == 1) & (bootstrap >= 1) ) {
    estci_full = calplot0(object, wbeta=wbeta, df=df, resample=0, bootci=1, oob=oob, repn=NULL, knottype=knottype, 
                     trim=trim, plot=0, lwd=2, xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, 
                     col.term=col.term, col.se=col.se, rug=rug, plothr=plothr , ... )$estci  
#    print( estci_full[1:5,] )
  }
  
  if ( (stop_ == 0) & (usefold == 1) ) { 
    if (resample == 1) { 
      if (bootstrap == 0) {
        xbeta = object$xbetas.cv[,wbeta ]  
        xbname = colnames(object$xbetas.cv)[wbeta] 
      } else if ((bootstrap >= 1) & (oob == 1)) {
        xbeta = object$xbetas.boot.oob[,wbeta+2 ]  
        xbname = colnames(object$xbetas.boot.oob)[wbeta+2] 
      } else if (bootstrap >= 1) {
        xbeta = object$xbetas.boot.inb[,wbeta+2 ]  
        xbname = colnames(object$xbetas.boot.inb)[wbeta+2] 
      }
    } else {
      xbeta = object$xbetas[,wbeta]  
      xbname = colnames(object$xbetas)[wbeta] 
    } 
#    print(xbeta[1:10])
    y_ = object$y_ ; 
    
    if (var(xbeta) == 0) {
      warning("  There may be no varaibalbity in the model X*Beta's\n",
              "    The calibration plot may be nonsensical")
    }

    minxb = min(object$xbetas[,wbeta ]) 
    maxxb = max(object$xbetas[,wbeta ]) 
    if (bootci == 1) {
#      minxb = min( object$xbetas[,wbeta+2 ], object$xbetas.boot.inb[,wbeta+2 ] )
#      maxxb = max( object$xbetas[,wbeta+2 ], object$xbetas.boot.inb[,wbeta+2 ] )
    }
    if (is.null(xlim)) {  xlim = c( minxb, maxxb )  }
    
    if (is.null(ylab)) {
      if (family == "cox") {
        if (plothr > 1) { ylab = "Calibrated HR"
        } else { ylab = "Calibrated log(HR)" } 
      } else {
        ylab = "Calibrated X*Beta"
      }
    }
    
    if (is.null(xlab)) { xlab = myxlab(family, plothr, resample, oob, xbname, bootstrap, bootci) }
    
    folds_n = as.numeric( object$tuning[1] )
    if (!(plotfold %in% c(0,1,2))) { plotfold = 0 }                             ## save estimates
    plotfold_ = plotfold 
    if (plotfold == 2) { plotfold_ = 2 }                                        ## make individual plots
    if (plotfold == 1) { plotfold_ = 0 }                                        ## make single plot with folds overlaid 
    
    if (bootstrap == 0) { reps = folds_n
    } else if (bootstrap >= 1) { reps = bootstrap }
    
    if (usefold == 1) {
      est.resample = matrix(0, nrow=reps, ncol=101)
      se.resample = est.resample
      lower.resample = est.resample
      upper.resample = est.resample
      meancalxb  = rep(0,folds_n)
      meanxb_x = rep(0,folds_n)
      minplot = min(minxb)
      maxplot = max(maxxb)
      
      for (i_ in c(1:reps)) { 
        calplotout = calplot0(object, wbeta=wbeta, df=df, resample=resample, bootci=bootci, oob=oob, repn=i_, knottype=knottype, 
                              trim=trim, plot=plotfold_, lwd=2, title=paste("fold",i_), xlim=xlim, ylim=ylim, xlab=xlab, ylab=ylab, 
                              col.term=col.term, col.se=col.se, rug=rug, plothr=plothr , ... )
        
        ##      object1=object ; repn=i_ ; plot=1 ; lwd=2 ; title=paste("fold",i_) ;
        ## get back the xbeta from the whole data set -- no predicteds used
        if (i_ == 1) { plotxb = calplotout$estci[,1] } 
        est.resample[i_,]  = calplotout$estci[,2]
        se.resample[i_,]   = calplotout$estci[,3]
        lower.resample[i_,] = calplotout$estci[,4]
        upper.resample[i_,] = calplotout$estci[,5]
        
        meancalxb[i_] = calplotout$meancalxb 
        meanxb_x[i_] = calplotout$meanxb_ 
        minplot = max( minplot, calplotout$minmax[1] ) 
        maxplot = min( maxplot, calplotout$minmax[2] ) 
        if (i_ == 1) {
          xb_x = calplotout$xb_ ;
          y_x = calplotout$y_
        } else {
          xb_x = c(xb_x, calplotout$xb_) ;
          y_x  = c(y_x , calplotout$y_ )
        }
      }
      
      #      cat( paste( "  Range of X*Beta for calibration: c( ", floor(minxb), " , ", ceiling(maxxb), " ) \n"))
      cat(paste( "  Range of X*Beta for calibration: \n"))
      cat(c(minxb,maxxb),"\n")
      cat(paste( "  Range of calibrated confidence intervals: \n")) 
      if (bootci == 1) {
      #        cat( c( min(estci_full[,4]), max(estci_full[,5]) ) ,"\n") 
      } else {
        cat( c( min(est.resample), max(est.resample) ) ,"\n") 
      }  
      #      sample(length(xb_x), nobs )
      #      plotxb.resample = calplotout$estci[,1]
      c(minplot, maxplot)
      ## get average and SE from the folds_n folds 
      
      if (bootci == 1) { 
        #        print( estci_full[1:20,1:5] )
        #        print( plotxb )
        est.resample = t( 2 * estci_full[,2] - t(est.resample) )                ## t for transpose 
      }    ## check row or column subtraction 
      
      est = colMeans(est.resample)
      
      if (bootstrap == 0) {
        se = sqrt(apply(est.resample,2,var)/folds_n)    
      } else if (bootstrap >= 1) {
        se = sqrt(apply(est.resample,2,var))    
      }
      upper = est + 2 * se 
      lower = est - 2 * se 
      
      if (plot %in% c(1,2)) {
        lty1 = 1 ; lty2 = 2 ; 
        
        xlims = quantile(xbeta, (c(trim,(100-trim))/100))
        keep_ = ( ((plotxb >= xlims[1]) & (plotxb <= xlims[2])) & ((plotxb >= xlim[1]) & (plotxb <= xlim[2])) ) 
        if ( trim > 0) { cat("  Spline curves are trimed at x's of", xlims[1], "and", xlims[2], ", or", trim, "% hi and lo\n") } 
        
        plotxb_ = plotxb[keep_]
        est_ = est[keep_]
        lower_ = lower[keep_]
        upper_ = upper[keep_]
        
        if (vref > 0) { vrefs = quantile(xbeta, (c(vref,(100-vref))/100)) }
        
        if (plotfold == 1) { lwd = 3 } else { lwd = 2 }
        
        #          plot(plotxb_, est_, type="l", lty=lty1, col=col.term, lwd=lwd, xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim, axes=0, ... )
        #          lines( plotxb_, lower_, lty=lty1, col=col.se, lwd=lwd) 
        #          lines( plotxb_, upper_, lty=lty1, col=col.se, lwd=lwd) 
        
        indxlo = c(1:length(plotxb_)) [ plotxb_ < minplot ] 
        indxhi = c(1:length(plotxb_)) [ plotxb_ > maxplot ] 
        if ( length( c(indxlo,indxhi) ) > 0) {
          indxfold = c(1:length(plotxb_)) [-c(indxlo,indxhi) ] 
        } else {
          indxfold = c(1:length(plotxb_))
        }
        
        xlim0 = c(min(plotxb_), max(plotxb_)) 
        if (!is.null(xlim)) { xlim0 = xlim }
        if (is.null(ylim)) {
          ylim = c(min(est_), max(est_))
          if ( length(indxfold) > 0) { 
            if ( min(lower_[indxfold] ) < ylim[1] ) { ylim[1] = min(lower_[indxfold] ) }
            if ( max(upper_[indxfold] ) > ylim[2] ) { ylim[2] = max(upper_[indxfold] ) }
          }
        }
        
        plot(  plotxb_[indxlo], est_[indxlo] , type="l", 
               lty=lty2, col=col.term, lwd=lwd, xlab=xlab, ylab=ylab, xlim=xlim0, ylim=ylim, axes=0 , ... )
        lines( plotxb_[indxlo]  , lower_[indxlo  ], lty=lty2, lwd=lwd, col=col.se ) 
        lines( plotxb_[indxlo]  , upper_[indxlo  ], lty=lty2, lwd=lwd, col=col.se ) 
        lines( plotxb_[indxfold], est_  [indxfold], lty=lty1, lwd=lwd, col=col.term ) 
        lines( plotxb_[indxfold], lower_[indxfold], lty=lty1, lwd=lwd, col=col.se ) 
        lines( plotxb_[indxfold], upper_[indxfold], lty=lty1, lwd=lwd, col=col.se ) 
        lines( plotxb_[indxhi]  , est_  [indxhi  ], lty=lty2, lwd=lwd, col=col.term ) 
        lines( plotxb_[indxhi]  , lower_[indxhi  ], lty=lty2, lwd=lwd, col=col.se ) 
        lines( plotxb_[indxhi]  , upper_[indxhi  ], lty=lty2, lwd=lwd, col=col.se ) 
        
        if (family == "cox") { 
#          cat(paste(" mean xbeta = ", mean(object$xbetas[,wbeta]), "  ", mean(xbeta) , "  wbeta = " , wbeta, " \n"  ))
          if (bootstrap >= 1) { abline(a=-mean(object$xbetas[,wbeta]), b=1, lty=3) 
          } else { abline(a=-mean(xbeta), b=1, lty=3) }
          abline(h=0, lty=1)
          myaxis(ylim, plothr, est)
          myaxis(xlim, plothr, NULL, side=1)
        } else { 
          abline(a=0, b=1, lty=3)
          axis(side=1)
          axis(side=2)
        }
        box()
        
        if( bootstrap >= 1) {
          xbeta_rug = object$xbetas[,wbeta]  
        } else {
          xbeta_rug = xbeta  
        }
        
        keep_rug  = ((xbeta_rug >= xlim[1]) & (xbeta_rug <= xlim[2]))
        if (sum(keep_rug==0) > 0) {
          cat(paste("  Due to user specified xlim ", sum(keep_rug==0) , "tick marks are not displayed in rug\n"))
          cat(paste("  min:max xb = ", min(xbeta), max(xbeta) , "\n"))
        }
        
        xbeta_rug = xbeta_rug[ keep_rug ]
        y_rug     = y_[ keep_rug ]
        
        #        c( min(xbeta), min(xbeta_) ) 
        if (rug == 1) { myrug(xbeta_rug, y_rug, family) } 
        if (vref > 0) { abline(v=vrefs, lty=3, col=2) }
      }
      
      if (plot_full != 0) { plot_full = 1 } 
      if ((resample == 1) & (bootstrap >= 1) & (plot_full == 1)) {
        lines( plotxb, estci_full[,2], lty=3, col=1,  lwd=4) 
      }
      
      if (plotfold == 1) {
        for (i_ in c(1:reps)) {  
          lines( plotxb, est.resample[i_,], lty=i_, col=i_+1,  lwd=1) 
        }
      }
      
    }
  }

  if (stop_ %in% c(0,2)) {
    if (plot %in% c(0,2)) {
      if (usefold == 1) {  
        return_list = list(estimates=cbind(plotxb, est, se, lower, upper), 
                           est.resample=est.resample, se.resample=se.resample, lower.resample=lower.resample, upper.resample=upper.resample)  
      } else {
        return_list = list( estci = estci )  
      }
      return( return_list )
    }
  }
} 

#===============================================================================
# x_ = rnorm(100)^2
# y_ = rbinnom(100,1,(1/(1+exp(x_)))) 

#' A customized rug, for Cox events on top, censors on bottom 
#'
#' @param x_ the x variable to be included in the rug
#' @param y_ an event variable for placing the x_ values top for events and bottom
#' for non-events 
#' @param family one of "cox", "binomial" or "gaussian" 
#'
#' @return A rug to plot 
#'
#' @noRd

myrug = function(x_, y_=NULL, family="gaussian") {
  if (family == "cox") {
    if (dim(y_)[2]== 3) { e_ = y_[,3] 
    } else { e_ = y_[,2] }
#    ticksize = (0.1*y_[,1]/max(y_[,1])) 
#    ticksize[ticksize < 0.001] = 0.001
#    print(c(length(ticksize), length(y_)))
#    print(summary(ticksize))
#    ticksize0 = ticksize[e_==0]
#    ticksize1 = ticksize[e_==1]
    ticksize0 = 0.03 
    ticksize1 = 0.03 
    rug(x_[e_==0], side=1, col=1, ticksize=ticksize0)
    rug(x_[e_==1], side=3, col=1, ticksize=ticksize1)
  } else if (family == "binomial") {
    e_ = y_ 
    rug(x_[e_==0], side=1, col=1,ticksize=0.03)
    rug(x_[e_==1], side=3, col=1,ticksize=0.03)
  } else {
    rug(x_, side=1, col=1, ticksize=0.03)
  }
}

#===============================================================================
#' Un-log, or not, the log(HR)'s for plotting
#'
#' @param zlim the y limits on the log(HR) scale A nested.glmnetr() output object for calibration
#' @param plothr  a power of determining the placement of y axis 
#' values, e.g. 2, sqrt(10) or 10.  The default of 0 plots the X*Beta.
#' @param est values of X*Beta estimates to be plotted which will determine the 
#' range to plot in case zlim is NULL. 
#' @param side 1 for horizontal axis, 2 for vertical 
#' 
#' @return An axis statement 
#'  
#' @noRd

myaxis = function(zlim,plothr,est=NULL,side=2, digits=2) {
  if (is.null(plothr)) { plothr = 0 }
  if ((is.null(zlim)) & (!is.null(est))) {
    zlim = c(min(est),max(est)) 
  } else if ((is.null(zlim)) & (is.null(est))) {
    plothr = 0
  }
  if (plothr > 1) {
    xbs = zlim
    hrs = exp(zlim) 
    logXhrs = log(hrs) / log(plothr)
    logXhrs_ = c(ceiling((logXhrs[1])): floor(logXhrs[2]))
    hrs_ = plothr^(logXhrs_)
    loghrs_ = log(hrs_)
    labls = round(hrs_,digits=digits) 
    myaxis = axis(side, at=loghrs_, labels = labls, las=1 )  
  } else {
    myaxis = axis(side)  
  }
  return(myaxis)
}

#===============================================================================
#' Set up default label for x-axis 
#'
#' @param family family
#' @param plothr  a power of determining the placement of y axis 
#' values, e.g. 2, sqrt(10) or 10.  The default of 0 plots the X*Beta.
#' @param resample resample 
#' @param xbname xbname
#' @param bootstrap bootstrap
#' @param bootci bootci
#' 
#' @return An axis label
#'  
#' @noRd

myxlab = function(family, plothr, resample, oob, xbname, bootstrap,bootci=0) {
  if (family %in% c("cox") ) {
    if (plothr > 1) {
      if (resample==1) { 
        if (bootstrap == 0) { 
          xlab = paste(xbname,"HR (Nested CV)") 
        } else if (bootstrap >= 1) {
          if (bootci == 1) { xlab = paste(xbname,"HR (Bootstrap In Bag, bias adjusted)")
          } else if (oob == 1) { xlab = paste(xbname,"HR (Bootstrap Out Of Bag)") 
          } else {  xlab = paste(xbname,"HR (Bootstrap In Bag)") }
        }
      } else { 
        xlab = paste(xbname,"HR (naive)")
      }
    } else {
      if (resample==1) { 
        if (bootstrap == 0) { 
          xlab = paste(xbname,"log(HR) (Nested CV)") 
        } else if (bootstrap >= 1) {
          if (bootci == 1) { xlab = paste(xbname,"HR (Bootstrap In Bag, bias adjusted)")
          } else if (oob == 1) { xlab = paste(xbname,"log(HR) (Bootstrap Out Of Bag)") 
          } else {  xlab = paste(xbname,"log(HR) (Bootstrap In Bag)") }
        }
      } else { 
        xlab = paste(xbname,"log(HR) (naive)")
      }
    }
  } else if (family %in% c("binomial","gaussian") ) {
    if (resample==1) { 
      if (bootstrap == 0) { 
        xlab = paste(xbname,"X*Beta (Nested CV)") 
      } else if (bootstrap >= 1) { 
        if (bootci == 1) { xlab = paste(xbname,"X*Beta (Bootstrap In Bag, bias adjusted)")
        } else if (oob == 1) { xlab = paste(xbname,"X*Beta (Bootstrap Out Of Bag)") 
        } else { xlab = paste(xbname,"X*Beta (Bootstrap In Bag)") }
      }
    } else { xlab = paste(xbname,"X*Beta (naive)") }
  }
  return(xlab)
}

################################################################################
################################################################################
Any scripts or data that you put into this service are public.
glmnetr documentation built on April 3, 2025, 6:45 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
glmnetr
Nested Cross Validation for the Relaxed Lasso and Other Machine Learning Models

R/calplot_241224.R
In glmnetr: Nested Cross Validation for the Relaxed Lasso and Other Machine Learning Models

Defines functions myxlab myaxis myrug calplot calplot0

Documented in calplot

Try the glmnetr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

glmnetr Nested Cross Validation for the Relaxed Lasso and Other Machine Learning Models

R/calplot_241224.R In glmnetr: Nested Cross Validation for the Relaxed Lasso and Other Machine Learning Models

Defines functions myxlab myaxis myrug calplot calplot0

Documented in calplot

Try the glmnetr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

glmnetr
Nested Cross Validation for the Relaxed Lasso and Other Machine Learning Models

R/calplot_241224.R
In glmnetr: Nested Cross Validation for the Relaxed Lasso and Other Machine Learning Models