R/plot.OR.R
In flexOR: Flexible Odds Ratio Curves

Documented in plot.OR

#' @title plot.OR: Plot Smooth Odds Ratios
#' @description
#' Plots smooth odds ratios along with confidence intervals for a specified predictor.
#'
#' @aliases plot.OR
#'
#' @param x An object of class "OR" generated by the \code{\link{flexOR}} function.
#' @param predictor The name of the predictor variable for which to plot the smooth odds ratios.
#' @param prob The probability level for the confidence interval. Default is NULL.
#' @param ref.value The predicted value at which to calculate the smooth odds ratios. Default is NULL.
#' @param conf.level The confidence level for the intervals. Default is 0.95.
#' @param round.x The number of decimal places to round the predictor variable values. Default is NULL.
#' @param ref.label The label for the reference value of the predictor variable. Default is NULL.
#' @param col Vector of colors for plotting. Default is c("black", "black", "grey85").
#' @param col.area Vector of colors for the confidence intervals.
#' @param main The title of the plot. Default is generated based on the predictor variable.
#' @param xlab Label for the x-axis. Default is the name of the predictor variable.
#' @param ylab Label for the y-axis. Default is "Ln OR(Z,Zref)" if logarithmic scale is used, else "OR(Z,Zref)".
#' @param lty Vector of line types for plotting. Default is c(1, 3).
#' @param xlim Range of the x-axis. Default is NULL.
#' @param ylim Range of the y-axis. Default is NULL.
#' @param xx Values for tick marks on the x-axis. Default is NULL.
#' @param ylog Logical. If TRUE, y-axis is on a logarithmic scale. Default is TRUE.
#' @param log Use a logarithmic scale for the y-axis (alternative argument name).
#' @param ... Additional arguments passed to plotting functions.
#'
#' @return
#' This function doesn't return a value. It is used for generating a plot.
#'
#' @examples
#' library(gam);
#'
#' # Load dataset
#' data(PimaIndiansDiabetes2, package="mlbench");
#'
#' mod1 <- flexOR(
#'   data=PimaIndiansDiabetes2,
#'   response="diabetes",
#'   formula=~s(age, 3.3) + s(mass, 4.1) + pedigree
#' );
#' 
#  plot(
# x = mod1,
# predictor = "mass",
# ref.value = 40,
# ref.label = "Ref. value",
# col.area = c("grey75", "grey90"),
# main = " ",
# xlab = "Body mass index",
# ylab = "Log Odds Ratio (Ln OR)",
# lty = c(1,2,2,3,3),
# round.x = 1,
# conf.level = c(0.8, 0.95)
# )
#'
#' @keywords hplot methods models nonlinear regression smooth
#' @importFrom stats update predict vcov quantile qnorm
#' @importFrom grDevices rgb
#' @import graphics
#' @export
plot.OR <- function(
    x, predictor, prob=NULL, ref.value=NULL, conf.level=0.95, round.x=NULL,
    ref.label=NULL, col, col.area, main, xlab, ylab, lty, xlim, ylim, xx, ylog=TRUE,
    log=ifelse(ylog, "", "y"), ...
) {
  object <- x;
  if ( !inherits(object, "OR") ) {stop("Object must be of class OR");}
  mydata <- object$dataset;
  fit <- object$gamfit;
  if ( missing(round.x) ) {round.x <- 5;}
  if ( !missing(ref.value) ) {prob <- 0.5;}
  if ( !missing(prob) ) {
    if (prob < 0 | prob > 1) {stop("The argument 'prob' must be between o and 1");}
  }
  if ( missing(prob) & missing(ref.value) ) {prob <- 0;}
  if ( !missing(ref.value) & !missing(xlim) ) {
    if ( ref.value < min(xlim) | ref.value > max(xlim) ) {
      stop("The reference value is out of range of 'xlim'");
    }
  }
  if ( missing(predictor) ) {stop("Missing predictor");}
  if ( missing(col) & length(conf.level) == 1) {col <- c(1, 1, 1);}
  if ( missing(col) & length(conf.level) == 2) {col <- c(1, 1, 1, 1, 1);}
  if ( missing(col.area) ) {
    col.area <- c( rgb(0.9, 0.9, 0.9, 0.5), rgb(0.7, 0.7, 0.7, 0.5) );
    if (length(conf.level)==1) {col.area <- col.area[1];}
  }
  if ( missing(ylab) ) {
    ylab <- if (ylog) {"Ln OR(Z, Zref)";} else {"OR(Z, Zref)";}
  }
  if ( missing(lty) & length(conf.level) == 1 ) {lty <- c(1, 2, 2);}
  if ( missing(lty) & length(conf.level) == 2 ) {lty <- c(1, 2, 2, 3, 3);}
  if ( is.list(fit$x) ) {fit <- update(fit, . ~ ., x=TRUE);}
  if ( !length(conf.level) %in% c(1, 2) ) {
    stop("'conf.level' length must be either 1 or 2");
  }
  if (length(conf.level) > 1) {conf.level <- sort(conf.level);}
  if ( any(conf.level <= 0.5, conf.level >= 1) ) {
    stop("'conf.level' must be greater than 0.5 and less than 1");
  }
  if ( length(col.area)!=length(conf.level) ) {
    stop("'col.area' and 'conf.level' must have the same length");
  }
  ctype <- "FALSE";
  qvalue <- (1+conf.level[1])/2;
  if (length(conf.level) > 1) {qvalue2 <- (1+conf.level[2])/2;}
  linear.predictor <- FALSE;
  k1 <- 9999;
  k <- which(names(mydata) == predictor);
  k <- c(k, k1);
  if (k[1] == 9999) {stop("predictor must be in data");}
  k <- k[1];
  a <- mydata;
  if ( missing(xlab) ) {xlab <- names(a)[k];}
  n.predictor <- names(a)[k];
  n <- dim(a)[1];
  if (prob == 0) {
    eta.no.ref <- predict(fit, type="terms");
    if ( inherits(eta.no.ref, "numeric") ) {
      kp <- 1;
      eta.no.ref <- cbind(eta.no.ref, eta.no.ref);
    }
    else {kp <- grep( predictor, colnames(eta.no.ref) );}
    eta.xref <- min(eta.no.ref[,kp]);
    ii <- which.min(eta.no.ref[,kp]);
    xref <- a[ii,k];
    eta.ref <- eta.no.ref[,kp]-eta.xref;
    indices <- grep(names(a)[k], dimnames(fit$x)[[2]]);
    submatriz.diseno <- fit$x[,indices];
    if ( !is.matrix(submatriz.diseno) ) {linear.predictor <- TRUE;}
    submatriz.var <- vcov(fit)[indices, indices];
    xref1 <- rep(fit$x[ii, indices], dim(fit$x)[1]);
    if (linear.predictor) {
      xref1 <- matrix(xref1, nrow=dim(fit$x)[1], ncol=1, byrow=TRUE);
    } else {
      xref1 <- matrix(
        xref1, nrow=dim(fit$x)[1], ncol=dim(submatriz.diseno)[2], byrow=TRUE
      );
    }
    eta.ref1 <- fit$x[,indices]-xref1;
    var.eta.ref1 <- rep(NA, n);
    for (i in 1:n) {
      var.eta.ref1[i] <- eta.ref1[i,]%*%vcov(fit)[indices, indices]%*%eta.ref1[i,];
    }
    se.eta.ref1 <- sqrt(var.eta.ref1);
  } else if (prob > 0 & prob < 1) {
    eta.no.ref <- predict(fit, type="terms");
    if ( inherits(eta.no.ref, "numeric") ) {
      kp <- 1;
      eta.no.ref <- cbind(eta.no.ref, eta.no.ref);
    }
    else {kp <- grep( predictor, colnames(eta.no.ref) );}
    ord <- order(a[,k]);
    if ( !missing(ref.value) ) {
      pp <- seq(0, 1, len=10000);
      app <- quantile(a[,k], pp);
      qq <- which(app <= ref.value);
      qq1 <- max(qq);
      prob <- qq1/10000;
    }
    ind.prob <- trunc(prob*n);
    xref <- a[,k][ord[ind.prob]];
    eta.xref <- eta.no.ref[,kp][ord[ind.prob]];
    eta.ref <- eta.no.ref[,kp]-eta.xref;
    indices <- grep(names(a)[k], dimnames(fit$x)[[2]]);
    submatriz.diseno <- fit$x[,indices];
    if ( !is.matrix(submatriz.diseno) ) {linear.predictor <- TRUE;}
    submatriz.var <- vcov(fit)[indices, indices];
    xref1 <- rep(fit$x[ord[ind.prob], indices], dim(fit$x)[1]);
    if (linear.predictor) {
      xref1 <- matrix(xref1, nrow=dim(fit$x)[1], ncol=1, byrow=TRUE);
    }	else {
      xref1 <- matrix(
        xref1, nrow=dim(fit$x)[1], ncol=dim(submatriz.diseno)[2], byrow=TRUE
      );
    }
    eta.ref1 <- fit$x[,indices]-xref1;
    var.eta.ref1 <- rep(NA, n);
    for (i in 1:n) {
      var.eta.ref1[i] <- eta.ref1[i,]%*%vcov(fit)[indices, indices]%*%eta.ref1[i,];
    }
    se.eta.ref1 <- sqrt(var.eta.ref1);
  } else if (prob == 1) {
    eta.no.ref <- predict(fit, type="terms");
    if ( inherits(eta.no.ref, "numeric") ) {
      kp <- 1;
      eta.no.ref <- cbind(eta.no.ref, eta.no.ref);
    }
    else {kp <- grep( predictor, colnames(eta.no.ref) );}
    eta.xref <- max(eta.no.ref[,kp]);
    ii <- which.max(eta.no.ref[,kp]);
    xref <- a[ii,k];
    eta.ref <- eta.no.ref[,kp]-eta.xref;
    indices <- grep(names(a)[k], dimnames(fit$x)[[2]]);
    submatriz.diseno <- fit$x[,indices];
    if ( !is.matrix(submatriz.diseno) ) {linear.predictor <- TRUE;}
    submatriz.var <- vcov(fit)[indices,indices];
    xref1 <- rep(fit$x[ii,indices], dim(fit$x)[1]);
    if (linear.predictor) {
      xref1 <- matrix(xref1, nrow=dim(fit$x)[1], ncol=1, byrow=TRUE);
    } else {
      xref1 <- matrix(
        xref1, nrow=dim(fit$x)[1], ncol=dim(submatriz.diseno)[2], byrow=TRUE
      );
    }
    eta.ref1 <- fit$x[,indices]-xref1;
    var.eta.ref1 <- rep(NA,n);
    for (i in 1:n) {
      var.eta.ref1[i] <- eta.ref1[i,]%*%vcov(fit)[indices,indices]%*%eta.ref1[i,];
    }
    se.eta.ref1 <- sqrt(var.eta.ref1);
  }
  if ( missing(main) ) {
    if (ylog) {main <- paste("Smooth log odds ratio for", names(a)[k]);}
    else {main <- paste("Smooth odds ratio for", names(a)[k]);}
  }
  
  tmat <- cbind(
    eta.ref, eta.ref-qnorm(qvalue)*se.eta.ref1, eta.ref+qnorm(qvalue)*se.eta.ref1
  );
  if (length(conf.level) > 1) {
    tmat <- cbind(
      tmat, eta.ref-qnorm(qvalue2)*se.eta.ref1, eta.ref+qnorm(qvalue2)*se.eta.ref1
    );
  }
  if (!ylog) {tmat <- exp(tmat);}
  
  line <- rep(0, n);
  jj <- match(sort(unique(a[,k])), a[,k]);
  if ( missing(xlim) ) {xlim <- c( min(a[,k]), max(a[,k]) );}
  else if ( missing(ylim) ) {
    index1 <- which( a[jj,k] >= min(xlim) & a[jj,k] <= max(xlim) );
    index <- jj[index1];
    ylim <- c( min(tmat[index,2]), max(tmat[index,3]) );
  }
  if ( missing(ylim) ) {ylim <- c( min(tmat[,2]), max(tmat[,3]) );}
  if ( xref < min(a[,k]) | xref > max(a[,k]) ) {
    stop("The reference value is out of range of x");
  }
  if ( xref < min(xlim) | xref > max(xlim) ) {
    stop("The reference value is out of range of 'xlim'");
  }
  
  #  matplot(
  #    x=a[jj,k], y=tmat[jj,], type="l", lty=c(1, 5, 5, 2), col=c(1, 2, 2, 1),
  #    xlab=xlab, ylab=ylab, xlim=xlim, ylim=ylim, log=log, xaxt="n", main=main, ...
  #  );
  
  if (length(conf.level) > 1) {
    tmat2 <- tmat;
    tmat2[,2:3] <- tmat[,4:5];
    tmat2[,4:5] <- tmat[,2:3];
    
    #plot
    matplot(
      x = a[jj,k],
      y = tmat[jj, ],
      type = "n",  # 'n' to not plot lines
      xaxt = "n",
      xlim = xlim, #a
      ylim = ylim, #a
      log = log, #a
      main = main, #a
      lty = lty, #a
      xlab = xlab,
      ylab = ylab
    );
    
    #shaded areas
    polygon(
      c(a[jj,k], rev(a[jj,k])),
      c(tmat[jj, 4], rev(tmat[jj, 5])),
      col = col.area[1],  
      border = NA
    );
    
    polygon(
      c(a[jj,k], rev(a[jj,k])),
      c(tmat[jj, 2], rev(tmat[jj, 3])),
      col = col.area[2],  
      border = NA
    );
    
    # Add lines to the plot 
    matlines(
      x = a[jj,k],
      y = tmat[jj, ],
      lty = lty,
      col = 1,
      type = "l"
    );
    
    # Definition of the x axes
    xxx <- round( seq(min(a[,k]), max(a[,k]),len=5) );
    if ( missing(xx) ) {
      xx <- c( min(a[,k]), round(xref,1), xxx[2], xxx[3], xxx[4], max(a[,k]) );
    }
    axis(1, xx, ...);
  }
  
  if (length(conf.level) == 1) {
    matplot(
      x = a[jj,k],
      y = tmat[jj, ],
      type = "n",  # 'n' to not plot lines
      xaxt = "n",
      xlim = xlim, #a
      ylim = ylim, #a
      log = log, #a
      main = main, #a
      lty = lty, #a
      xlab = xlab,
      ylab = ylab
    );
    
    polygon(
      c(a[jj,k], rev(a[jj,k])),
      c(tmat[jj, 2], rev(tmat[jj, 3])),
      col = col.area[1],  
      border = NA
    );
    
    # Add lines
    matlines(
      x = a[jj,k],
      y = tmat[jj, ],
      lty = lty,  
      col = col   
    );
    
    # Definition of the x axes
    xxx <- round( seq(min(a[,k]), max(a[,k]),len=5) );
    if ( missing(xx) ) {
      xx <- c( min(a[,k]), round(xref,1), xxx[2], xxx[3], xxx[4], max(a[,k]) );
    }
    axis(1, xx, ...);
  }
  
  y <- c(0, 0);
  
  if ( missing(xlim) ) {
    v1 <- min(a[,k])+( max(a[,k])-min(a[,k]) )/10;
    v2 <- min(a[,k])+9*( max(a[,k])-min(a[,k]) )/10;
  } else {
    v1 <- min(xlim)+( max(xlim)-min(xlim) )/10;
    v2 <- min(xlim)+9*( max(xlim)-min(xlim) )/10;
  }
  if ( missing(ylim) ) {
    y[1] <- max(tmat[,3])/2;
    y[2] <- min(tmat[,2]);
  } else {
    y[1] <- max(ylim)/2;
    y[2] <- min(ylim);
  }
  if ( !missing(ref.label) ) {n.predictor <- ref.label;}
  if (xref > v1 & xref < v2) {
    arrows(xref, y[1], xref, y[2], length=0.08);
    ys <- y[1];
    if (ys > 2*y[1]-(2*y[1]-y[2])/10) {
      text(
        xref, y[1], paste( n.predictor, "=", round(xref, round.x) ),
        adj=c(0.5, 2.3), ...
      );
    } else {
      text(
        xref, y[1], paste( n.predictor, "=", round(xref, round.x) ),
        adj=c(0.5, -0.7), ...
      );
    }
  } else if (xref <= v1) {
    v3 <- ( max(xlim)-min(xlim) )/100;
    xref2 <- xref;
    if ( xref == min(xlim) ) {xref2 <- xref+min(0.05, v3);}
    arrows(xref2, y[1], xref2, y[2], length=0.08);
    ys <- y[1];
    if (ys > 2*y[1]-(2*y[1]-y[2])/10)  {
      text(
        xref, y[1], paste( n.predictor, "=", round(xref, round.x) ),
        adj=c(0, 2.3), ...
      );
    } else {
      text(
        xref, y[1], paste( n.predictor, "=", round(xref, round.x) ),
        adj=c(0, -0.7), ...
      );
    }
  } else if (xref >= v2) {
    v3 <- ( max(xlim)-min(xlim) )/100;
    xref2 <- xref;
    if ( xref == max(xlim) ) {xref2 <- xref-min(0.05, v3);}
    arrows(xref2, y[1], xref2, y[2], length=0.08);
    ys <- y[1];
    if (ys > 2*y[1]-(2*y[1]-y[2])/10) {
      text(
        xref, y[1], paste( n.predictor, "=", round(xref, round.x) ),
        adj=c(1, 2.3), ...
      );
    } else {
      text(
        xref, y[1], paste( n.predictor, "=", round(xref, round.x) ),
        adj=c(1, -0.7), ...
      );
    }
  }
  tmat2 <- tmat;
  if (length(conf.level) > 1) {
    tmat2 <- tmat;
    tmat2[,2:3] <- tmat[,4:5];
    tmat2[,4:5] <- tmat[,2:3]
  }
  return( invisible( list(estimates=tmat2, xref=xref) ) );
} # plot.OR