R/Utilities.R

Defines functions subgrp_perf subgrp_perf_pred cat_summary cut_perf fixcut_sur fixcut_con fixcut_bin gam_ctr_plot gam_plot scat_cont_plot roc_bin_plot roc_bin cdf_plot get_subgroup_results predictive_biomarker_imp isout

Documented in cat_summary cdf_plot cut_perf fixcut_bin fixcut_con fixcut_sur gam_ctr_plot gam_plot get_subgroup_results isout predictive_biomarker_imp roc_bin roc_bin_plot scat_cont_plot subgrp_perf subgrp_perf_pred

#' Tutorial Data
#'
#' A dataset containing sample data for demonstrating the functionalities of the BioPred package.
#'
#' @format A data frame with the following columns:
#' \describe{
#'   \item{x1}{Numeric. A biomarker variable.}
#'   \item{x2}{Numeric. A biomarker variable.}
#'   \item{x3}{Numeric. A biomarker variable.}
#'   \item{x4}{Numeric. A biomarker variable.}
#'   \item{x5}{Numeric. A biomarker variable.}
#'   \item{x6}{Numeric. A biomarker variable.}
#'   \item{x7}{Numeric. A biomarker variable.}
#'   \item{x8}{Numeric. A biomarker variable.}
#'   \item{x9}{Numeric. A biomarker variable.}
#'   \item{x10}{Numeric. A biomarker variable.}
#'   \item{y.con}{Numeric. A continuous outcome variable.}
#'   \item{y.bin}{Binary. A binary outcome variable, where 0 represents one class and 1 represents another class.}
#'   \item{y.time}{Numeric. The time in months, used for survival analysis.}
#'   \item{y.event}{Binary. Event indicator variable, where 0 indicates censoring and 1 indicates the event of interest occurred.}
#'   \item{subgroup_label}{Binary. Ground truth of subgroup label. In real-world scenarios, this information is typically unavailable.}
#'   \item{treatment}{Binary. Treatment indicator variable, where 0 represents control and 1 represents treatment.}
#'   \item{treatment_categorical}{Factor. A categorical version of the treatment variable, with levels "Placebo" and "Treatment".}
#'   \item{risk_category}{Factor.}
#' }
#'
#' @details
#' This dataset is used to illustrate various functions within the BioPred package, including predictive modeling and subgroup analysis. The columns represent different types of data typically encountered in clinical studies.
#'
#' @usage data(tutorial_data)
#'
#' @keywords datasets
#'
#' @examples
#' data(tutorial_data)
#' head(tutorial_data)
"tutorial_data"











#### outlier detection based on boxplot ####

#' Outlier Detection Based on Boxplot
#'
#' This function identifies outliers in a numeric vector based on the interquartile range (IQR) method used in boxplots.
#'
#' @param x A numeric vector of values for outlier detection.
#' @return A logical vector indicating which elements of the input vector are outliers (TRUE if an outlier, FALSE otherwise).
#' @keywords internal
isout <- function(x) {
  iqr <- stats::IQR(x, na.rm = TRUE)
  up <- stats::quantile(x, probs = 3/4, na.rm = TRUE)
  lo <- stats::quantile(x, probs = 1/4, na.rm = TRUE)
  up <- up + 1.5 * iqr
  lo <- lo - 1.5 * iqr
  out <- x > up | x < lo
  out
}



#' Plot Predictive Biomarker Importance based on XGBoost-based Subgroup Model
#'
#' This function calculates and plots the importance of biomarkers in a trained XGBoostSub_con, XGBoostSub_bin or XGBoostSub_sur model.
#'
#' @param model The trained XGBoost-based model.
#' @return A barplot showing the biomarker importance.
#' @import xgboost
#' @export
#' @examples
#' X_data <- matrix(rnorm(100 * 10), ncol = 10)  # 100 samples with 10 features
#' y_data <- rnorm(100)  # continuous outcome variable
#' trt <- sample(c(1, -1), 100, replace = TRUE)  # treatment indicator (1 or -1)
#' pi <- runif(100, min = 0.3, max = 0.7)  # propensity scores between 0 and 1
#'
#' # Define XGBoost parameters
#' params <- list(
#'   max_depth = 3,
#'   eta = 0.1,
#'   subsample = 0.8,
#'   colsample_bytree = 0.8
#' )
#'
#' # Train the model using A-learning loss
#' model_A <- XGBoostSub_con(
#'   X_data = X_data,
#'   y_data = y_data,
#'   trt = trt,
#'   pi = pi,
#'   Loss_type = "A_learning",
#'   params = params,
#'   nrounds = 5,
#'   disable_default_eval_metric = 1,
#'   verbose = TRUE
#' )
#' biomarker_imp=predictive_biomarker_imp(model_A)
predictive_biomarker_imp <- function(model) {
  # Calculate feature importance
  importance <- xgb.importance(model = model)

  # Plot feature importance
  graphics::barplot(importance$Gain,
          main = "Biomarker Importance",
          xlab = "Gain",
          ylab = "Biomarker",
          col = "skyblue",
          horiz = TRUE,
          names.arg = importance$Feature)

  # Return data frame containing feature importance
  importance_df <- data.frame(Biomarker = importance$Feature, Importance = importance$Gain)
  return(importance_df)
}




#' Get Subgroup Results
#'
#' This function predicts the treatment assignment for each patient based on a cutoff value.
#'
#' @param model The trained XGBoost-based subgroup model.
#' @param X_feature The data matrix containing patient features.
#' @param subgroup_label (Optional) The subgroup labels. In real-world data, this information is typically unknown and only available in simulated data. If provided, the prediction accuracy will also be returned.
#' @param cutoff The cutoff value for treatment assignment, defaulted to 0.5.
#' @return A data frame containing each subject and assigned treatment (1 for treatment, 0 for control). If subgroup labels are provided, it also returns the prediction accuracy of the subgroup labels.
#' @import xgboost
#' @export
#' @examples
#' X_data <- matrix(rnorm(100 * 10), ncol = 10)  # 100 samples with 10 features
#' y_data <- rnorm(100)  # continuous outcome variable
#' trt <- sample(c(1, -1), 100, replace = TRUE)  # treatment indicator (1 or -1)
#' pi <- runif(100, min = 0.3, max = 0.7)  # propensity scores between 0 and 1
#'
#' # Define XGBoost parameters
#' params <- list(
#'   max_depth = 3,
#'   eta = 0.1,
#'   subsample = 0.8,
#'   colsample_bytree = 0.8
#' )
#'
#' # Train the model using A-learning loss
#' model_A <- XGBoostSub_con(
#'   X_data = X_data,
#'   y_data = y_data,
#'   trt = trt,
#'   pi = pi,
#'   Loss_type = "A_learning",
#'   params = params,
#'   nrounds = 5,
#'   disable_default_eval_metric = 1,
#'   verbose = TRUE
#' )
#' subgroup_results=get_subgroup_results(model_A, X_data, subgroup_label=NULL, cutoff = 0.5)
get_subgroup_results <- function(model, X_feature, subgroup_label = NULL, cutoff = 0.5) {
  # Convert data to xgb.DMatrix
  if (is.null(subgroup_label)) {
    subgroup_label <- sample(c(0, 1), nrow(X_feature), replace = TRUE)
  }
  dtrain <- xgboost::xgb.DMatrix(data = as.matrix(X_feature), label = subgroup_label)
  # Predict treatment assignment for each patient
  pred <- stats::predict(model, dtrain)
  pred_prob <- 1.0/(1.0+exp(-pred))
  # Assign treatment based on the cutoff
  prediction <- ifelse(pred_prob > cutoff, 1, 0)

  acc <- NULL
  if (!is.null(subgroup_label)) {
    acc <- mean(subgroup_label == prediction)
  }
  # Create a data frame with patient ID and assigned treatment
  results <- data.frame(Patient_ID = seq_len(nrow(X_feature)),
                        Treatment_Assignment = prediction)

  if (!is.null(acc)) {
    return(list(assignment=results, acc = acc))
  }
  return(results)
}

#' CDF  Plot for a biomarker
#'
#' Cumulative Distribution Function (CDF) plot for a biomarker.
#'
#' @param xvar The biomarker name.
#' @param data The dataset.
#' @param y.int Increasement interval on the y.
#' @param xlim cdf plot range for xvar, when NULL, c(min(x), max(x)) will be used.
#' @param xvar.display Display name of the biomarker.
#' @param group A separate CDF line will be plotted for each group.
#' @return A ggplot object representing the CDF inverse plot.
#' @import ggplot2
#' @export
#' @examples
#' # Load a sample dataset
#'   data <- data.frame(
#'   biomarker = rnorm(100, mean = 50, sd = 10),
#'   group = sample(c("Group A", "Group B"), 100, replace = TRUE)
#' )
#'
#' # Basic CDF plot for a single biomarker without groups
#'   cdf_plot(
#'   xvar = "biomarker",
#'   data = data,
#'   y.int = 10,
#'   xlim = c(30, 70),
#'   xvar.display = "Biomarker Level"
#' )
#' # CDF plot for a biomarker with groups
#' cdf_plot(
#'   xvar = "biomarker",
#'   data = data,
#'   y.int = 10,
#'   xlim = c(30, 70),
#'   xvar.display = "Biomarker Level",
#'   group = "group"
#' )
cdf_plot = function(xvar, data, y.int=5, xlim=NULL, xvar.display=xvar, group=NULL){
   num=NULL
   if(is.null(group)||is.na(group)){
    x = data[,xvar]
    x = x[!is.na(x)]

    if(is.null(xlim)) xlim=c(min(x),max(x))

    plot.data = NULL
    x.axis.vals = sort(unique(c(seq(from=xlim[1],to=xlim[2],length.out=1000), x)))
    for(x.uni.i in x.axis.vals){
      plot.data.i = data.frame(x=x.uni.i, num = mean(x<=x.uni.i)*100)
      plot.data = rbind(plot.data, plot.data.i)
    }


    fig = ggplot(data=plot.data, aes(x=x, y=num)) +
      geom_line(size=0.8) +
      labs(title=paste("Cumulative Distribution of", xvar.display) , x=xvar.display, y="% of Subjects <= Cutoff")+
      scale_y_continuous(breaks=seq(0, 100, y.int),limits = c(0,100))+
      scale_x_continuous(breaks=round(seq(from=xlim[1],to=xlim[2],length.out=11), digits=2))+
      theme_bw(base_size=15)+
      theme(text=element_text(size=15), axis.text=element_text(size=15),legend.text=element_text(size=15))

  }else{
    data = data[,c(xvar, group)]
    data = data[stats::complete.cases(data),]

    if(is.null(xlim)) xlim=c(min(data[,xvar]), max(data[,xvar]))

    plot.data = NULL
    x.axis.vals = sort(unique(c(seq(from=xlim[1],to=xlim[2],length.out=1000), data[,xvar])))

    for(grp.i in unique(data[,group])){
      x.i = data[data[,group]==grp.i, xvar]
      for(x.uni.i in x.axis.vals){
        plot.data.i = data.frame(x=x.uni.i, num = mean(x.i<=x.uni.i)*100, group=grp.i)
        plot.data = rbind(plot.data, plot.data.i)
      }
    }

    fig = ggplot2::ggplot(data=plot.data, aes(x=x, y=num, group=group, colour=group)) +
      geom_line(size=0.8) +
      labs(title=paste("Cumulative Distribution of", xvar.display) , x=xvar.display, y="% of Subjects <= Cutoff", colour = group)+
      scale_y_continuous(breaks=seq(0, 100, y.int),limits = c(0,100))+
      scale_x_continuous(breaks=round(seq(from=xlim[1],to=xlim[2],length.out=11), digits=2))+
      theme_bw(base_size=15)+
      theme(text=element_text(size=15), axis.text=element_text(size=15),legend.text=element_text(size=15))
  }
  fig
}

#' AUC ROC Table for Biomarkers Associated with Binary Outcomes
#'
#' Computes the area under the receiver operating characteristic (ROC) curve for Biomarkers Associated with Binary Outcomes,
#' and returns the results as a table.
#'
#' @param yvar Binary response variable name, where 0 represents controls and 1 represents cases.
#' @param xvars A vector of biomarker names.
#' @param dirs A vector of directions for the biomarkers. Options are "auto", ">", or "<".
#'   - "auto" (default): automatically determines in which group the median is higher and takes the direction accordingly.
#'   - ">": indicates that the biomarkers for the control group are higher than those for the case group (controls > t >= cases).
#'   - "<": indicates that the biomarkers for the control group are lower or equal to those for the case group (controls < t <= cases).
#' @param data The dataset containing the variables.
#' @param yvar.display Display name for the binary response variable.
#' @param xvars.display Display names for the biomarkers.
#' @return A table containing the AUC values for each biomarker.
#' @import pROC
#' @export
#' @examples
#' # Load a sample dataset
#' data <- data.frame(
#'   outcome = sample(c(0, 1), 100, replace = TRUE),
#'   biomarker1 = rnorm(100, mean = 0, sd = 1),
#'   biomarker2 = rnorm(100, mean = 5, sd = 2)
#' )
#'
#' # Compute AUC for a single biomarker with auto direction
#' roc_bin(
#'   yvar = "outcome",
#'   xvars = "biomarker1",
#'   dirs = "auto",
#'   data = data,
#'   yvar.display = "Binary Outcome",
#'   xvars.display = "Biomarker 1"
#' )
#'
#' # Compute AUC for multiple biomarkers with specified directions
#' roc_bin(
#'   yvar = "outcome",
#'   xvars = c("biomarker1", "biomarker2"),
#'   dirs = c("auto", "<"),
#'   data = data,
#'   yvar.display = "Binary Outcome",
#'   xvars.display = c("Biomarker 1", "Biomarker 2")
#' )
roc_bin = function(yvar, xvars, dirs, data, yvar.display=yvar, xvars.display=xvars){

  if(!all(data[,yvar]%in%c(0,1,NA)) | !all(c(0,1)%in%data[,yvar])) stop("Response needs to be 0/1.")
  if(length(xvars)!=length(dirs)) stop("xvars and dirs should be of the same length.")
  data[,yvar] = factor(data[,yvar], levels=c(0, 1))

  res.all = NULL
  for(i in 1:length(xvars)){
    xvar = xvars[i]
    dir = dirs[i]

    if(dir=="<") {
      dir=">"
    }else if(dir==">") {
      dir="<"
    }

    data.temp = data[,c(yvar,xvar)]
    data.temp = data.temp[stats::complete.cases(data.temp),]

    fml = stats::as.formula(paste(yvar,"~", xvar))
    res.i = roc(fml, data=data.temp, percent=F, na.rm=T, direction=dir, algorithm=1, quiet=T, smooth=F, auc=T, ci=F, plot=F)
    if(res.i$direction == ">") {
      res.i$direction = "<"
    }else if(res.i$direction == "<") {
      res.i$direction = ">"
    }
    res.i = data.frame(Response = yvar.display, Predictor = xvars.display[i], direction=paste("case",res.i$direction,"ctrl"),
                       AUC = as.numeric(res.i$auc))
    res.all = rbind(res.all, res.i)

  }

  res.all
}

#' ROC Plot Biomarkers Associated with Binary Outcomes
#'
#' Generates ROC plots for different biomarkers associated with binary outcomes.
#'
#' @param yvar Binary response variable name, where 0 represents controls and 1 represents cases.
#' @param xvars A vector of biomarker names.
#' @param dirs A vector of directions for the biomarkers. Options are "auto", ">", or "<".
#'   - "auto" (default): automatically determines in which group the median is higher and takes the direction accordingly.
#'   - ">" indicates that the biomarkers for the control group are higher than those for the case group (controls > t >= cases).
#'   - "<" indicates that the biomarkers for the control group are lower or equal to those for the case group (controls < t <= cases).
#' @param data The dataset containing the variables.
#' @param yvar.display Display name for the binary response variable.
#' @param xvars.display Display names for the biomarkers.
#' @return ROC plots for different biomarkers associated with binary outcomes.
#' @import pROC
#' @export
#' @examples
#' # Load a sample dataset
#' data <- data.frame(
#'   outcome = sample(c(0, 1), 100, replace = TRUE),
#'   biomarker1 = rnorm(100, mean = 0, sd = 1),
#'   biomarker2 = rnorm(100, mean = 5, sd = 2)
#' )
#'
#' # Generate ROC plot for a single biomarker with auto direction
#' roc_bin_plot(
#'   yvar = "outcome",
#'   xvars = "biomarker1",
#'   dirs = "auto",
#'   data = data,
#'   yvar.display = "Binary Outcome",
#'   xvars.display = "Biomarker 1"
#' )
#'
#' # Generate ROC plots for multiple biomarkers with specified directions
#' roc_bin_plot(
#'   yvar = "outcome",
#'   xvars = c("biomarker1", "biomarker2"),
#'   dirs = c("auto", "<"),
#'   data = data,
#'   yvar.display = "Binary Outcome",
#'   xvars.display = c("Biomarker 1", "Biomarker 2")
#' )
roc_bin_plot = function(yvar, xvars, dirs, data, yvar.display=yvar, xvars.display=xvars){

  if(!all(data[,yvar]%in%c(0,1,NA)) | !all(c(0,1)%in%data[,yvar])) stop("Response needs to be 0/1.")
  if(length(xvars)!=length(dirs)) stop("xvars and dirs should be of the same length.")
  data[,yvar] = factor(data[,yvar], levels=c(0, 1))

  for(i in 1:length(xvars)){
    xvar = xvars[i]
    dir = dirs[i]

    if(dir=="<") {
      dir=">"
    }else if(dir==">") {
      dir="<"
    }

    data.temp = data[,c(yvar,xvar)]
    data.temp = data.temp[stats::complete.cases(data.temp),]

    fml = stats::as.formula(paste(yvar,"~", xvar))
    res.i = roc(fml, data=data.temp, percent=F, na.rm=T, direction=dir, algorithm=1, quiet=T, smooth=F, auc=T, ci=F, plot=F)
    plot.roc(res.i, add=F, reuse.auc=T, axes=T, print.auc=T, grid=T, main=paste(yvar.display, "~", xvars.display[i]))
  }

}



#' Scatter Plot for a Biomarker Associated with Continuous Outcome
#'
#' Generates a scatter plot for exploring the relationship between a continuous response variable
#' and a biomarker variable.
#'
#' @param yvar Continuous response variable name.
#' @param xvar biomarker name.
#' @param data The dataset containing the variables.
#' @param ybreaks Breaks on the y-axis.
#' @param xbreaks Breaks on the x-axis.
#' @param yvar.display Display name for the response variable.
#' @param xvar.display Display name for the biomarker variable.
#' @return A list containing correlation coefficients, scatter plot, slope, and intercept.
#' @export
#' @examples
#' data <- data.frame(
#'   outcome = rnorm(100, mean = 10, sd = 2),
#'   biomarker = rnorm(100, mean = 0, sd = 1)
#' )
#'
#' # Generate a scatter plot with default axis breaks
#' scat_cont_plot(
#'   yvar = "outcome",
#'   xvar = "biomarker",
#'   data = data,
#'   yvar.display = "Continuous Outcome",
#'   xvar.display = "Biomarker Level"
#' )
#'
#' # Generate a scatter plot with specified axis breaks
#' scat_cont_plot(
#'   yvar = "outcome",
#'   xvar = "biomarker",
#'   data = data,
#'   ybreaks = seq(5, 15, by = 1),
#'   xbreaks = seq(-2, 2, by = 0.5),
#'   yvar.display = "Continuous Outcome",
#'   xvar.display = "Biomarker Level"
#' )
scat_cont_plot = function(yvar, xvar, data, ybreaks=NULL, xbreaks=NULL, yvar.display=yvar, xvar.display=xvar){
  data = data[,c(yvar,xvar)]
  data = data[stats::complete.cases(data),]
  names(data) = c("y","x")

  y=data[["y"]]
  x=data[["x"]]
  N = length(x)

  ## pearson ##
  cor.pears = round(stats::cor(x, y, use="complete.obs",method="pearson"),digits=2)
  cor.pears.p = round(stats::cor.test(x, y, alternative="two.sided", method="pearson")$p.value, digits=4)
  ## spearman ##
  cor.spear = round(stats::cor(x, y, use="complete.obs",method="spearman"),digits=2)
  cor.spear.p = round(stats::cor.test(x, y, alternative="two.sided", method="spearman")$p.value, digits=4)

  lm.fit = stats::lm(y~x)
  slope = summary(lm.fit)$coefficients["x","Estimate"]
  intercept = summary(lm.fit)$coefficients["(Intercept)","Estimate"]
  jitter.width = (max(x)-min(x))/80
  jitter.height = (max(y)-min(y))/80


  if(is.null(xbreaks)) xbreaks=round(seq(min(x), max(x), length.out = 10),digits=2)
  if(is.null(ybreaks)) ybreaks=round(seq(min(y), max(y), length.out = 10),digits=2)

  scat.plot = ggplot(data, aes(x=x, y=y)) +
    geom_point(size=3,shape=1, stroke=1.5,position=position_jitter(width=jitter.width,height=jitter.height))+
    theme_bw(base_size=15)+
    theme(text=element_text(size=15), axis.text=element_text(size=15),legend.text=element_text(size=15))+
    geom_abline(aes(slope=slope,intercept =intercept, color="Fitted Line"), size=0.8)+
    labs(color="") +
    xlab(xvar.display)+
    ylab(yvar.display)+
    scale_y_continuous(breaks=ybreaks)+
    scale_x_continuous(breaks=xbreaks)+
    ggtitle(paste(xvar.display," vs. ",yvar.display, " (N = ", N, ")\nCor.Pearson = ",cor.pears,
                  ", Cor.Spearman = ",cor.spear, "\nPearson.p = ",cor.pears.p,
                  ", Spearman.p = ",cor.spear.p, sep=""))+
    theme(plot.title = element_text(hjust = 0.5, size=15))


  list(Cor.Pearson = cor.pears, Cor.Spearman = cor.spear, fig=scat.plot, slope=slope, intercept=intercept)


}



#' GAM Plot
#'
#' Generates a generalized additive model (GAM) plot for exploring the relationship between a response
#' variable and a biomarker.
#'
#' @param yvar Response variable name.
#' @param censorvar Censoring variable name for survival analysis (0-censored, 1-event).
#' @param xvar Biomarker name.
#' @param xvars.adj Potential confounding variables to adjust for using linear terms.
#' @param sxvars.adj Potential confounding variables to adjust for using curve terms.
#' @param type "c" for continuous, "s" for survival, and "b" for binary response.
#' @param data The dataset containing the variables.
#' @param k Upper limit on the degrees of freedom associated with an s smooth.
#' @param pred.type "iterms" for trend of xvar, "response" for Y at the original scale.
#' @param link.scale Whether to show the plot in the scale of the link function.
#' @param title Title of the plot.
#' @param ybreaks Breaks on the y-axis.
#' @param xbreaks Breaks on the x-axis.
#' @param rugcol.var Variable name defining the color of the rug and points.
#' @param add.points Whether to add data points to the plot.
#' @param prt.sum Whether to print summary or not.
#' @param prt.chk Whether to print model diagnosis.
#' @param outlier.rm Whether to remove outliers based on 1.5IQR.
#' @param newdat User-supplied customized data for prediction and plotting.
#' @return A list containing p-table, s-table, GAM summary, GAM check, and the plot.
#' @import mgcv
#' @export
#' @examples
#' # Load a sample dataset
#' data <- data.frame(
#'   response = rnorm(100),
#'   biomarker = rnorm(100, mean = 50, sd = 10),
#'   censor = sample(c(0, 1), 100, replace = TRUE),
#'   age = rnorm(100, mean = 60, sd = 10),
#'   group = sample(c("Group A", "Group B"), 100, replace = TRUE)
#' )
#'
#' # Generate a GAM plot for a continuous response variable
#' gam_plot(
#'   yvar = "response",
#'   xvar = "biomarker",
#'   type = "c",
#'   data = data,
#'   xvars.adj = "age",
#'   sxvars.adj = NULL,
#'   k = 5,
#'   pred.type = "iterms",
#'   title = "GAM Plot of Biomarker and Response"
#' )
#'
#' # Generate a GAM plot for survival analysis
#' gam_plot(
#'   yvar = "response",
#'   censorvar = "censor",
#'   xvar = "biomarker",
#'   type = "s",
#'   data = data,
#'   k = 5,
#'   title = "GAM Survival Plot for Biomarker"
#' )
#'
#' # Generate a GAM plot for a binary response variable
#' data$binary_response <- as.numeric(data$response > 0)
#' gam_plot(
#'   yvar = "binary_response",
#'   xvar = "biomarker",
#'   type = "b",
#'   data = data,
#'   k = 5,
#'   pred.type = "response",
#'   title = "GAM Plot for Binary Response"
#' )
gam_plot = function(yvar, censorvar=NULL, xvar, xvars.adj=NULL, sxvars.adj=NULL, type, data, k,
                    pred.type="iterms", link.scale=TRUE, title="Trend Plot", ybreaks=NULL, xbreaks=NULL,
                    rugcol.var=NULL, add.points=FALSE, prt.sum=TRUE, prt.chk=FALSE, outlier.rm=FALSE, newdat=NULL){
  x=NULL
  yfit=NULL
  yfit.Lo=NULL
  yfit.Up=NULL
  rug=NULL
  rugcol=NULL
  y=NULL

  if(!pred.type%in%c("iterms","response")) stop("pred.type can only be iterms or response.")
  if(type=="s"){
    if(is.null(censorvar)) stop("Censoring variable missing!")
  }else if(type=="c"){
    type="c"
  }else if(type=="b"){
    if(!all(data[,yvar]%in%c(0,1,NA)) | !all(c(0,1)%in%data[,yvar])) stop("Response needs to be 0/1.")
  }else{
    stop("type can only be s, c or b.")
  }

  data = data[, c(yvar,censorvar,xvar, xvars.adj, sxvars.adj, rugcol.var)]
  data = data[stats::complete.cases(data[, c(yvar,censorvar,xvar, xvars.adj, sxvars.adj)]),]
  if(outlier.rm){
    data = data[!isout(data[,xvar]),]
  }


  if(is.null(xvars.adj)){
    par.fml = NULL
  }else{
    par.fml = paste(xvars.adj,collapse=" + ", sep="")
    par.fml = paste(par.fml,"+", sep="")
  }

  if(is.null(sxvars.adj)) {
    s.fml = NULL
  }else{
    s.fml = paste("s(",sxvars.adj,",k=",k,")",collapse="+",sep="")
    s.fml = paste(s.fml,"+",sep="")
  }


  s.fml = paste(s.fml,"s(",xvar,",k=",k,")",sep="")

  fml = stats::as.formula(paste(yvar,"~",par.fml,s.fml))

  if(type=="s"){
    res.gam = gam(fml, family=cox.ph(), data=data, weights=data[,censorvar], method="REML", select=F)
  }else if(type=="c"){
    res.gam = gam(fml, family=stats::gaussian(), data=data, method="REML", select=F)
  }else if(type=="b"){
    res.gam = gam(fml, family=stats::binomial(link = "logit"), data=data, method="REML", select=F)
  }

  ptable = summary(res.gam)$p.table
  stable = summary(res.gam)$s.table

  ptable.display = ptable
  if(!is.null(ptable.display)) ptable.display = round(ptable.display, digits=4)

  stable.display = stable
  if(!is.null(stable.display)) stable.display = round(stable.display, digits=4)

  if(prt.sum) {
    cat("GAM SUMMARY")
    print(summary(res.gam))
    cat("\n")
  }
  if(prt.chk) {
    cat("GAM CHECK")
    gam.check(res.gam)
    cat("\n")

  }

  if(is.null(newdat)){
    newdat = data.frame(x=seq(min(data[,xvar]), max(data[,xvar]), length.out=999))
    names(newdat)= xvar
    for(i in c(xvars.adj,sxvars.adj)){
      newdat[,i] = stats::median(data[,i])
    }
  }


  if(pred.type=="iterms"){
    x.term = paste("s(",xvar,")",sep="")
    dat.pred = stats::predict(object=res.gam, newdata=newdat, type="iterms",se.fit=T,
                       terms=x.term)

    plot.data = data.frame(yfit = dat.pred$fit[,x.term],
                           yfit.Up = dat.pred$fit[,x.term]+1.96*dat.pred$se.fit[,x.term],
                           yfit.Lo = dat.pred$fit[,x.term]-1.96*dat.pred$se.fit[,x.term], x=newdat[,xvar])

    if(type=="c"){
      ylab = "Mean Difference from Population Average"
      if(!link.scale) message("link.scale is ignored for type==c as link.scale can only be T for type==c.")
    }else if(type=="b"){
      ylab = "Log(OR) against Population Average"
      if(!link.scale){
        plot.data = data.frame(yfit = exp(plot.data$yfit), yfit.Up = exp(plot.data$yfit.Up),
                               yfit.Lo = exp(plot.data$yfit.Lo), x=plot.data[,"x"])
        ylab="OR against Population Average"
      }

    }else if(type=="s"){
      ylab = "Log(HR) against Population Average"
      if(!link.scale){
        plot.data = data.frame(yfit = exp(plot.data$yfit), yfit.Up = exp(plot.data$yfit.Up),
                               yfit.Lo = exp(plot.data$yfit.Lo), x=plot.data[,"x"])
        ylab="HR against Population Average"
      }

    }

  }else if(pred.type=="response"){

    if(type=="s") stop("time to event cannnot do pred.type=response.")


    dat.pred = stats::predict(object=res.gam, newdata=newdat, type="response",se.fit=T)

    plot.data = data.frame(yfit = as.vector(dat.pred$fit),
                           yfit.Up = as.vector(dat.pred$fit)+1.96*as.vector(dat.pred$se.fit),
                           yfit.Lo = as.vector(dat.pred$fit)-1.96*as.vector(dat.pred$se.fit),
                           x=newdat[,xvar])
    ylab = yvar
  }



  if(is.null(xbreaks)) xbreaks=round(seq(min(plot.data$x), max(plot.data$x), length.out = 10),digits=2)
  if(is.null(ybreaks)) ybreaks=round(seq(min(plot.data$yfit.Lo), max(plot.data$yfit.Up), length.out = 10),digits=2)

  if(is.null(rugcol.var)){
    data.rug = data.frame(rug=data[,xvar], rugcol="Individual Record", y=data[,yvar])
  }else{
    data.rug = data.frame(rug=data[,xvar], rugcol=data[,rugcol.var], y=data[,yvar])
  }

  rugcol.levels = as.character(sort(unique(data.rug$rugcol), na.last = T))
  rugcol.levels[is.na(rugcol.levels)] = "N/A"
  data.rug$rugcol = as.character(data.rug$rugcol)
  data.rug$rugcol[is.na(data.rug$rugcol)] = "N/A"
  data.rug$rugcol=factor(data.rug$rugcol, levels=rugcol.levels)

  if(pred.type=="response" & add.points){
    fig=ggplot(data=plot.data, aes(x=x, y=yfit)) +
      geom_ribbon(aes(ymin=yfit.Lo, ymax=yfit.Up),alpha=0.2) +
      geom_line(size=0.8)+
      geom_rug(aes(x=rug, colour=rugcol),data=data.rug, alpha=0.7, sides="b", inherit.aes=F)+
      geom_point(aes(x=rug, y=y, colour=rugcol),data=data.rug, size=2.5, inherit.aes=F)+
      labs(color="") +
      ylab(ylab)+
      xlab(xvar)+
      scale_y_continuous(breaks=ybreaks)+
      scale_x_continuous(breaks=xbreaks)+
      ggtitle(title)+
      theme_bw(base_size=15)+
      theme(text=element_text(size=15), axis.text=element_text(size=15),legend.text=element_text(size=15))


  }else{
    fig=ggplot(data=plot.data, aes(x=x, y=yfit)) +
      geom_ribbon(aes(ymin=yfit.Lo, ymax=yfit.Up),alpha=0.2) +
      geom_line(size=0.8)+
      geom_rug(aes(x=rug, colour=rugcol),data=data.rug, alpha=0.7, sides="b", inherit.aes=F)+
      labs(color="") +
      ylab(ylab)+
      xlab(xvar)+
      scale_y_continuous(breaks=ybreaks)+
      scale_x_continuous(breaks=xbreaks)+
      ggtitle(title)+
      theme_bw(base_size=15)+
      theme(text=element_text(size=15), axis.text=element_text(size=15),legend.text=element_text(size=15))
  }



  res = list(ptable=ptable, ptable.display=ptable.display, stable=stable, stable.display=stable.display,
             fig=fig, plot.data = plot.data)
  res
}





#' GAM Contrast Plot
#'
#' Computes and plots the contrasts between treatment and control group based on a GAM for exploring the relationship be-tween treatment benefit and biomarker.
#'
#' @param yvar Response variable name.
#' @param censorvar Censoring variable name (0-censored, 1-event). Required if type is "s" (survival).
#' @param xvar Biomarker name.
#' @param xvars.adj Potential confounding variables to adjust for using linear terms.
#' @param sxvars.adj Potential confounding variables to adjust for using curves.
#' @param trtvar Treatment variable that the contrast will build upon (treatment-control).
#' @param type Type of response variable. Options are "c" for continuous, "s" for survival, and "b" for binary response.
#' @param data The dataset containing the variables.
#' @param k Upper limit on the degrees of freedom associated with an s smooth.When this k is too large, program will report error saying
#   "Model has more coefficients than data."
#' @param title Title of the plot.
#' @param ybreaks Breaks on the y-axis.
#' @param xbreaks Breaks on the x-axis.
#' @param rugcol.var Variable name that defines the color of the rug.
#' @param link.scale Whether to show the plot (y-axis) in the scale of the link function (linear predictor).
#' @param prt.sum Whether to print summary or not.
#' @param prt.chk Whether to print model diagnosis.
#' @param outlier.rm Whether to remove outliers based on 1.5IQR.
#' @return A list containing the p-value table, summarized p-value table, s-value table, summarized s-value table, and the plot.
#' @import mgcv
#' @export
#' @examples
#' # Load a sample dataset
#' data <- data.frame(
#'   response = rnorm(100),
#'   biomarker = rnorm(100, mean = 50, sd = 10),
#'   censor = sample(c(0, 1), 100, replace = TRUE),
#'   treatment = sample(c(0, 1), 100, replace = TRUE),
#'   age = rnorm(100, mean = 60, sd = 10),
#'   group = sample(c("Group A", "Group B"), 100, replace = TRUE)
#' )
#'
#' # Generate a GAM contrast plot for a continuous response variable
#' gam_ctr_plot(
#'   yvar = "response",
#'   xvar = "biomarker",
#'   trtvar = "treatment",
#'   type = "c",
#'   data = data,
#'   xvars.adj = "age",
#'   k = 5,
#'   title = "GAM Contrast Plot for Treatment vs. Control"
#' )
#'
#' # Generate a GAM contrast plot for survival analysis
#' gam_ctr_plot(
#'   yvar = "response",
#'   censorvar = "censor",
#'   xvar = "biomarker",
#'   trtvar = "treatment",
#'   type = "s",
#'   data = data,
#'   k = 5,
#'   title = "GAM Contrast Plot for Survival Data"
#' )
#'
#' # Generate a GAM contrast plot for a binary response variable
#' data$binary_response <- as.numeric(data$response > 0)
#' gam_ctr_plot(
#'   yvar = "binary_response",
#'   xvar = "biomarker",
#'   trtvar = "treatment",
#'   type = "b",
#'   data = data,
#'   k = 5,
#'   title = "GAM Contrast Plot for Binary Outcome"
#' )
gam_ctr_plot = function(yvar, censorvar=NULL, xvar, xvars.adj=NULL, sxvars.adj=NULL, trtvar=NULL, type,
                        data, k, title="Group Contrast", ybreaks=NULL, xbreaks=NULL, rugcol.var=NULL,
                        link.scale=TRUE, prt.sum=TRUE, prt.chk=FALSE, outlier.rm=FALSE){
   x=NULL
   yfit=NULL
   yfit.Lo=NULL
   yfit.Up=NULL
   rug=NULL
   rugcol=NULL

  if(type=="s"){
    if(is.null(censorvar)) stop("Censoring variable missing!")
  }else if(type=="c"){
    type="c"
  }else if(type=="b"){
    if(!all(data[,yvar]%in%c(0,1,NA)) | !all(c(0,1)%in%data[,yvar])) stop("Response needs to be 0/1.")
  }else{
    stop("type can only be s, c or b.")
  }

  if(is.null(trtvar)) stop("trtvar is missing. Contrast is between treatments.")
  if(!is.numeric(data[,trtvar])) stop("Column of trtvar should be a numeric 0/1 vector.")
  if(!all(data[,trtvar]%in%c(0,1,NA)) || !all(c(0,1)%in%data[,trtvar])) stop("Column of trtvar needs to be 0/1.")


  data = data[, c(yvar,censorvar, xvar, xvars.adj, sxvars.adj, trtvar, rugcol.var)]
  data = data[stats::complete.cases(data[,c(yvar,censorvar, xvar, xvars.adj, sxvars.adj, trtvar)]),]

  if(outlier.rm){
    data = data[!isout(data[,xvar]),]
  }



  if(type!="s"){
    if("intercept"%in%names(data)) stop("Variable names should not include 'intercept'.")
    data$intercept = 1
    xvars.adj = c("intercept", xvars.adj)
  }


  if(is.null(xvars.adj)){
    par.fml = NULL
  }else{
    par.fml = paste(xvars.adj,collapse="+", sep="")
    par.fml = paste(par.fml,"+", sep="")
  }
  par.fml = paste(par.fml, trtvar, "+", sep="")


  if(is.null(sxvars.adj)) {
    s.fml = NULL
  }else{
    s.fml = paste("s(",sxvars.adj,",k=",k,")",collapse="+",sep="")
    s.fml = paste(s.fml,"+",sep="")
  }


  s.fml = paste(s.fml,"s(",xvar,",k=",k,")+s(",xvar,",k=",k, ",by=",trtvar,", pc=",
                mean(data[,xvar]),")",sep="")
  if(type!="s"){
    fml = stats::as.formula(paste(yvar,"~ -1+",par.fml,s.fml, sep=""))
  }else{
    fml = stats::as.formula(paste(yvar,"~",par.fml,s.fml, sep=""))
  }


  if(type=="s"){
    res.gam = gam(fml, family=cox.ph(), data=data, weights=data[,censorvar], method="REML", select=F)
  }else if(type=="c"){
    res.gam = gam(fml, family=stats::gaussian(), data=data, method="REML", select=F)
  }else if(type=="b"){
    res.gam = gam(fml, family=stats::binomial(link="logit"), data=data, method="REML", select=F)
  }

  ptable = summary(res.gam)$p.table
  stable = summary(res.gam)$s.table

  ptable.display = ptable
  if(!is.null(ptable.display)) ptable.display = round(ptable.display, digits=4)

  stable.display = stable
  if(!is.null(stable.display)) stable.display = round(stable.display, digits=4)

  if(prt.sum) {
    cat("GAM SUMMARY")
    print(summary(res.gam))
    cat("\n")
  }
  if(prt.chk) {
    cat("GAM CHECK")
    gam.check(res.gam)
    cat("\n")

  }

  newdat = data.frame(x=seq(min(data[,xvar]), max(data[,xvar]), length.out=1000), trt=1)
  names(newdat)= c(xvar, trtvar)
  for(i in xvars.adj){
    newdat[,i] = 0
  }
  for(i in sxvars.adj){
    newdat[,i] = stats::median(data[,i])
  }

  exclude.term = c(paste("s(",sxvars.adj,")",sep=""), paste("s(",xvar,")",sep=""))
  dat.pred = stats::predict(object=res.gam, newdata=newdat, type="link", se.fit=T, exclude=exclude.term)
  plot.data = data.frame(yfit = dat.pred$fit, yfit.Up = dat.pred$fit+1.96*dat.pred$se.fit,
                         yfit.Lo = dat.pred$fit-1.96*dat.pred$se.fit, x=newdat[,xvar])
  avg.line = ptable[trtvar,"Estimate"]


  if(type=="c"){
    ylab = "Mean Difference"
  }else if(type=="b"){
    ylab = "Log(OR)"
    if(!link.scale){
      plot.data = data.frame(yfit = exp(plot.data$yfit), yfit.Up = exp(plot.data$yfit.Up),
                             yfit.Lo = exp(plot.data$yfit.Lo), x=plot.data[,"x"])
      avg.line = exp(avg.line)
      ylab="OR"
    }
  }else if(type=="s"){
    ylab="Log(HR)"
    if(!link.scale){
      plot.data = data.frame(yfit = exp(plot.data$yfit), yfit.Up = exp(plot.data$yfit.Up),
                             yfit.Lo = exp(plot.data$yfit.Lo), x=plot.data[,"x"])
      avg.line = exp(avg.line)
      ylab="HR"
    }
  }

  if(is.null(xbreaks)) xbreaks=round(seq(min(plot.data$x), max(plot.data$x), length.out = 10),digits=2)
  if(is.null(ybreaks)) ybreaks=round(seq(min(plot.data$yfit.Lo), max(plot.data$yfit.Up), length.out = 10),digits=2)

  if(is.null(rugcol.var)){
    data.rug = data.frame(rug=data[,xvar], rugcol="Individual Record")
  }else{
    data.rug = data.frame(rug=data[,xvar], rugcol=data[,rugcol.var])
  }

  rugcol.levels = as.character(sort(unique(data.rug$rugcol), na.last = T))
  rugcol.levels[is.na(rugcol.levels)] = "N/A"
  data.rug$rugcol = as.character(data.rug$rugcol)
  data.rug$rugcol[is.na(data.rug$rugcol)] = "N/A"
  data.rug$rugcol=factor(data.rug$rugcol, levels=rugcol.levels)


  fig=ggplot(data=plot.data, aes(x=x, y=yfit)) +
    geom_ribbon(aes(ymin=yfit.Lo, ymax=yfit.Up),alpha=0.2) +
    geom_line(size=0.8)+
    geom_abline(aes(slope=0,intercept =avg.line, color="Mean"), linetype="dashed",size=0.9)+
    geom_rug(aes(x=rug, colour=rugcol),data=data.rug, alpha=0.7, sides="b", inherit.aes=F)+
    labs(color="") +
    ylab(ylab)+
    xlab(xvar)+
    scale_y_continuous(breaks=ybreaks)+
    scale_x_continuous(breaks=xbreaks)+
    ggtitle(title)+
    theme_bw(base_size=15)+
    theme(text=element_text(size=15), axis.text=element_text(size=15),legend.text=element_text(size=15))


  res = list(ptable=ptable, ptable.display=ptable.display, stable=stable, stable.display=stable.display, fig=fig)

  res


}

#' Fixed Cutoff Analysis for Individual Biomarker Associated with Binary Outcome Variables
#'
#' This function conducts fixed cutoff analysis for individual biomarker associated with binary outcome variables.
#'
#' @param yvar Binary response variable name. 0 represents controls and 1 represents cases.
#' @param xvar Biomarker name.
#' @param dir Cutoff direction for the desired subgroup. Options are ">", ">=", "<", or "<=".
#' @param cutoffs A vector of candidate cutoffs.
#' @param data The dataset containing the variables.
#' @param method Method for cutoff selection. Options are "Fisher", "Youden", "Conc.Prob", "Accuracy", or "Kappa".
#'        - "Fisher": Minimizes the Fisher test p-value.
#'        - "Youden": Maximizes the Youden index.
#'        - "Conc.Prob": Maximizes sensitivity * specificity.
#'        - "Accuracy": Maximizes accuracy.
#'        - "Kappa": Maximizes Kappa coefficient.
#' @param yvar.display Display name of the response variable.
#' @param xvar.display Display name of the predictor variable.
#' @param vert.x Whether to display the cutoff in a 90-degree angle when plotting (saves space).
#' @return A list containing statistical summaries, selected cutoff statistics, selected cutoff value, confusion matrix,
#'         and a ggplot object for visualization.
#' @import PropCIs
#' @export
#' @examples
#' # Load a sample dataset
#' data <- data.frame(
#'   outcome = sample(c(0, 1), 100, replace = TRUE),
#'   biomarker = rnorm(100, mean = 0, sd = 1)
#' )
#'
#' # Perform fixed cutoff analysis using the "Fisher" method for a biomarker
#' fixcut_bin(
#'   yvar = "outcome",
#'   xvar = "biomarker",
#'   dir = ">",
#'   cutoffs = seq(-2, 2, by = 0.5),
#'   data = data,
#'   method = "Fisher",
#'   yvar.display = "Binary Outcome",
#'   xvar.display = "Biomarker Level",
#'   vert.x = TRUE
#' )
#'
#' # Perform fixed cutoff analysis using the "Youden" method
#' fixcut_bin(
#'   yvar = "outcome",
#'   xvar = "biomarker",
#'   dir = "<",
#'   cutoffs = seq(-2, 2, by = 0.5),
#'   data = data,
#'   method = "Youden",
#'   yvar.display = "Binary Outcome",
#'   xvar.display = "Biomarker Level",
#'   vert.x = FALSE
#' )
#'
#' # Perform fixed cutoff analysis using "Accuracy" method with different direction
#' fixcut_bin(
#'   yvar = "outcome",
#'   xvar = "biomarker",
#'   dir = ">=",
#'   cutoffs = c(-1, 0, 1),
#'   data = data,
#'   method = "Accuracy",
#'   yvar.display = "Binary Outcome",
#'   xvar.display = "Biomarker Level",
#'   vert.x = TRUE
#' )
fixcut_bin = function(yvar, xvar, dir, cutoffs, data, method="Fisher", yvar.display=yvar, xvar.display=xvar, vert.x=FALSE){
  Cutoff=NULL
  Value=NULL
  Measure=NULL
  Lower=NULL
  Upper=NULL
  data = data[,c(xvar, yvar)]
  data = data[stats::complete.cases(data),]
  row.names(data)=NULL
  if(!dir%in%c(">", "<", ">=", "<=")) stop("dir should be >, <, >=, or <=")
  if(!all(data[,yvar]%in%c(0,1,NA)) | !all(c(0,1)%in%data[,yvar])) stop("Response needs to be 0/1.")

  cutoffs = sort(unique(cutoffs))

  x=data[,xvar]
  y=data[,yvar]
  y=factor(y,levels=c(0,1))

  stat = NULL
  for(i in 1:length(cutoffs)){
    if(dir == "<"){
      pred = factor((x<cutoffs[i])*1,levels=c(0,1))
    }else if(dir == "<="){
      pred = factor((x<=cutoffs[i])*1,levels=c(0,1))
    }else if(dir == ">"){
      pred = factor((x>cutoffs[i])*1,levels=c(0,1))
    }else if(dir == ">="){
      pred = factor((x>=cutoffs[i])*1,levels=c(0,1))
    }

    cont = table(pred,y)
    TP = cont[2,2]
    FP = cont[2,1]
    TN = cont[1,1]
    FN = cont[1,2]
    sen = TP/(TP+FN)
    spe = TN/(TN+FP)
    ppv = TP/(TP+FP)
    npv = TN/(TN+FN)
    acc = (TP+TN)/(TP+FP+TN+FN)
    pe = mean(as.numeric(as.character(pred)))*mean(as.numeric(as.character(y)))+
      (1-mean(as.numeric(as.character(pred))))*(1-mean(as.numeric(as.character(y))))
    kap = (acc-pe)/(1-pe)


    sen.lo = scoreci(x=TP, n=TP+FN, conf.level = 0.95)$conf.int[1]
    sen.up = scoreci(x=TP, n=TP+FN, conf.level = 0.95)$conf.int[2]

    spe.lo = scoreci(x=TN, n=TN+FP, conf.level = 0.95)$conf.int[1]
    spe.up = scoreci(x=TN, n=TN+FP, conf.level = 0.95)$conf.int[2]

    ppv.lo = scoreci(x=TP, n=TP+FP, conf.level = 0.95)$conf.int[1]
    ppv.up = scoreci(x=TP, n=TP+FP, conf.level = 0.95)$conf.int[2]

    npv.lo = scoreci(x=TN, n=TN+FN, conf.level = 0.95)$conf.int[1]
    npv.up = scoreci(x=TN, n=TN+FN, conf.level = 0.95)$conf.int[2]

    acc.lo = scoreci(x=TP+TN, n=TP+FP+TN+FN, conf.level = 0.95)$conf.int[1]
    acc.up = scoreci(x=TP+TN, n=TP+FP+TN+FN, conf.level = 0.95)$conf.int[2]

    fish.p = stats::fisher.test(x=pred, y=y, or=1, alternative="two.sided")$p.value

    stat = rbind(stat, data.frame(Response = yvar.display, Predictor = xvar.display,
                                  Direction = dir,Cutoff = cutoffs[i], Sensitivity = sen, Specificity = spe,
                                  PPV = ppv, NPV = npv, Accuracy = acc, Kappa=kap, Fisher.pvalue = fish.p,
                                  Sensitivity.Lo = sen.lo, Sensitivity.Up = sen.up,
                                  Specificity.Lo = spe.lo, Specificity.Up = spe.up,
                                  PPV.Lo = ppv.lo, PPV.Up = ppv.up, NPV.Lo = npv.lo, NPV.Up = npv.up,
                                  Accuracy.Lo = acc.lo, Accuracy.Up = acc.up))

  }

  stat.display = stat
  temp = stat.display[,c("Sensitivity", "Specificity", "PPV", "NPV", "Accuracy", "Kappa", "Sensitivity.Lo", "Sensitivity.Up",
                         "Specificity.Lo", "Specificity.Up", "PPV.Lo", "PPV.Up", "NPV.Lo", "NPV.Up",
                         "Accuracy.Lo", "Accuracy.Up")]
  stat.display[,c("Sensitivity", "Specificity", "PPV", "NPV", "Accuracy", "Kappa","Sensitivity.Lo", "Sensitivity.Up",
                  "Specificity.Lo", "Specificity.Up", "PPV.Lo", "PPV.Up", "NPV.Lo", "NPV.Up",
                  "Accuracy.Lo", "Accuracy.Up")] = round(temp, digits=2)
  stat.display$Fisher.pvalue = round(stat.display$Fisher.pvalue, digits=4)

  stat.display$Sensitivity.CI = paste(stat.display$Sensitivity, " (", stat.display$Sensitivity.Lo, " , ", stat.display$Sensitivity.Up,")", sep="")
  stat.display$Specificity.CI = paste(stat.display$Specificity, " (", stat.display$Specificity.Lo, " , ", stat.display$Specificity.Up,")", sep="")
  stat.display$PPV.CI = paste(stat.display$PPV, " (", stat.display$PPV.Lo, " , ", stat.display$PPV.Up,")", sep="")
  stat.display$NPV.CI = paste(stat.display$NPV, " (", stat.display$NPV.Lo, " , ", stat.display$NPV.Up,")", sep="")
  stat.display$Accuracy.CI = paste(stat.display$Accuracy, " (", stat.display$Accuracy.Lo, " , ", stat.display$Accuracy.Up,")", sep="")
  stat.display = stat.display[,c("Response","Predictor","Direction", "Cutoff", "Sensitivity.CI",
                                 "Specificity.CI", "PPV.CI", "NPV.CI", "Accuracy.CI", "Kappa", "Fisher.pvalue")]

  if(method=="Fisher"){
    stat.sel = stat[which.min(stat$Fisher.pvalue), ]
    stat.sel.display = stat.display[which.min(stat$Fisher.pvalue),]
  }else if(method=="Youden"){
    stat.sel = stat[which.max(stat$Sensitivity+stat$Specificity), ]
    stat.sel.display = stat.display[which.max(stat$Sensitivity+stat$Specificity),]
  }else if(method=="Conc.Prob"){
    stat.sel = stat[which.max(stat$Sensitivity*stat$Specificity), ]
    stat.sel.display = stat.display[which.max(stat$Sensitivity*stat$Specificity),]
  }else if(method=="Accuracy"){
    stat.sel = stat[which.max(stat$Accuracy), ]
    stat.sel.display = stat.display[which.max(stat$Accuracy),]

  }else if(method=="Kappa"){
    stat.sel = stat[which.max(stat$Kappa), ]
    stat.sel.display = stat.display[which.max(stat$Kappa),]
  }


  row.names(stat.sel) = NULL
  row.names(stat.sel.display) = NULL
  cut.sel = stat.sel$Cutoff

  if(dir == "<"){
    pred = factor((x<cut.sel)*1,levels=c(0,1))
    dir.else = ">="
  }else if(dir == "<="){
    pred = factor((x<=cut.sel)*1,levels=c(0,1))
    dir.else = ">"
  }else if(dir == ">"){
    pred = factor((x>cut.sel)*1,levels=c(0,1))
    dir.else="<="
  }else if(dir == ">="){
    pred = factor((x>=cut.sel)*1,levels=c(0,1))
    dir.else="<"
  }

  cont = as.matrix(table(pred,y))
  colnames_cont = c(paste(yvar.display,"=", 0), paste(yvar.display,"=", 1))
  rownames_cont = c(paste(xvar.display,dir.else,cut.sel), paste(xvar.display,dir,cut.sel))
  temp.table = matrix(paste("(",round(cont/rowSums(cont), digits=3)*100, "%)",sep=""), nrow=dim(cont)[1])
  cont = matrix(paste(cont, temp.table), nrow=dim(cont)[1])
  rownames(cont) = rownames_cont
  colnames(cont) = colnames_cont

  #plot
  plot.sen = cbind(Measure="Sensitivity",stat[,c("Direction", "Cutoff", "Sensitivity", "Sensitivity.Lo", "Sensitivity.Up")])
  plot.spe = cbind(Measure="Specificity",stat[,c("Direction", "Cutoff", "Specificity", "Specificity.Lo", "Specificity.Up")])
  plot.ppv = cbind(Measure="PPV",stat[,c("Direction","Cutoff","PPV","PPV.Lo", "PPV.Up")])
  plot.npv = cbind(Measure="NPV",stat[,c("Direction","Cutoff","NPV","NPV.Lo", "NPV.Up")])
  plot.acc = cbind(Measure="Accuracy",stat[,c("Direction","Cutoff","Accuracy","Accuracy.Lo", "Accuracy.Up")])

  names(plot.sen) = c("Measure", "Direction", "Cutoff", "Value", "Lower", "Upper")
  names(plot.spe) = c("Measure", "Direction", "Cutoff", "Value", "Lower", "Upper")
  names(plot.ppv) = c("Measure", "Direction", "Cutoff", "Value", "Lower", "Upper")
  names(plot.npv) = c("Measure", "Direction", "Cutoff", "Value", "Lower", "Upper")
  names(plot.acc) = c("Measure", "Direction", "Cutoff", "Value", "Lower", "Upper")

  plot.data = rbind(plot.sen, plot.spe, plot.ppv, plot.npv, plot.acc)
  plot.data$Cutoff = factor(plot.data$Cutoff, levels=sort(unique(plot.data$Cutoff)))

  if(vert.x){
    fig = ggplot(plot.data, aes(x = Cutoff, y = Value, group=Measure, colour = Measure))+
      geom_errorbar(aes(ymin=Lower, ymax=Upper), width=0.3, size=0.8,position=position_dodge(0.2))+
      geom_line(size=0.8, position=position_dodge(0.2))+
      geom_point(size=1.8, position=position_dodge(0.2))+
      xlab(paste(xvar.display,dir,"Cutoff"))+
      ggtitle(paste(yvar.display, "~", xvar.display))+
      theme_bw(base_size=15)+
      theme(text=element_text(size=15), axis.text=element_text(size=15),legend.text=element_text(size=15),
            axis.text.x = element_text(angle = 90, hjust = 1, size=13))
  }else{
    fig = ggplot(plot.data, aes(x = Cutoff, y = Value, group=Measure, colour = Measure))+
      geom_errorbar(aes(ymin=Lower, ymax=Upper), width=0.3, size=0.8,position=position_dodge(0.2))+
      geom_line(size=0.8, position=position_dodge(0.2))+
      geom_point(size=1.8, position=position_dodge(0.2))+
      xlab(paste(xvar.display,dir,"Cutoff"))+
      ggtitle(paste(yvar.display, "~", xvar.display))+
      theme_bw(base_size=15)+
      theme(text=element_text(size=15), axis.text=element_text(size=15),legend.text=element_text(size=15))

  }

  list(stat=stat, stat.display=stat.display, stat.sel=stat.sel, stat.sel.display=stat.sel.display, cut.sel=cut.sel, confusion = cont, fig=fig)
}

#' Fixed Cutoff Analysis for Individual Biomarker Associated with Continuous Outcome
#'
#' This function conducts fixed cutoff analysis for individual biomarker associated with continuous outcome variables.
#'
#' @param yvar Continuous response variable name.
#' @param xvar Biomarker name.
#' @param dir Cutoff direction for the desired subgroup. Options are ">", ">=", "<", or "<=".
#' @param cutoffs A vector of candidate cutoffs.
#' @param data The dataset containing the variables.
#' @param method Method for cutoff selection. Currently only supports "t.test".
#'        - "t.test": Minimizes the t-test p-value.
#' @param yvar.display Display name of the response variable.
#' @param xvar.display Display name of the predictor variable.
#' @param vert.x Whether to display the cutoff in a 90-degree angle when plotting (saves space).
#' @return A list containing statistical summaries, selected cutoff statistics, selected cutoff value, group statistics,
#'         and a ggplot object for visualization.
#' @export
#' @examples
#' # Load a sample dataset
#' data <- data.frame(
#'   outcome = rnorm(100, mean = 10, sd = 5),
#'   biomarker = rnorm(100, mean = 0, sd = 1)
#' )
#'
#' # Perform fixed cutoff analysis using the "t.test" method with '>' direction
#' fixcut_con(
#'   yvar = "outcome",
#'   xvar = "biomarker",
#'   dir = ">",
#'   cutoffs = seq(-2, 2, by = 0.5),
#'   data = data,
#'   method = "t.test",
#'   yvar.display = "Continuous Outcome",
#'   xvar.display = "Biomarker Level",
#'   vert.x = TRUE
#' )
#'
#' # Perform fixed cutoff analysis with '<=' direction
#' fixcut_con(
#'   yvar = "outcome",
#'   xvar = "biomarker",
#'   dir = "<=",
#'   cutoffs = c(-1, 0, 1),
#'   data = data,
#'   method = "t.test",
#'   yvar.display = "Continuous Outcome",
#'   xvar.display = "Biomarker Level",
#'   vert.x = FALSE
#' )
fixcut_con <- function(yvar, xvar, dir, cutoffs, data, method="t.test", yvar.display=yvar, xvar.display=xvar, vert.x=FALSE) {
  Cutoff <- NULL
  Mean <- NULL
  Group <- NULL
  Mean.Lo <- NULL
  Mean.Up <- NULL

  data <- data[, c(xvar, yvar)]
  data <- data[stats::complete.cases(data),]
  row.names(data) <- NULL
  if ("Group" %in% names(data)) stop("xvar and yvar should not have the name of Group.")
  if (!dir %in% c(">", "<", ">=", "<=")) stop("dir should be >, <, >=, or <=")
  cutoffs <- sort(unique(cutoffs))
  stat <- NULL
  for (i in seq_along(cutoffs)) {
    if (dir == ">") {
      data$Group <- (data[, xvar] > cutoffs[i]) * 1
    } else if (dir == ">=") {
      data$Group <- (data[, xvar] >= cutoffs[i]) * 1
    } else if (dir == "<") {
      data$Group <- (data[, xvar] < cutoffs[i]) * 1
    } else if (dir == "<=") {
      data$Group <- (data[, xvar] <= cutoffs[i]) * 1
    }

    # Check for empty groups
    if (nrow(data) == 0) {
      next
    }

    fml <- stats::as.formula(paste(yvar, "~ Group"))
    ttest <- try(stats::t.test(fml, data = data, alternative = "two.sided", mu = 0, var.equal = FALSE, conf.level = 0.95), silent = TRUE)

    if (!inherits(ttest, "try-error")) {
      pval.t <- ttest$p.value
      mean.diff.lo <- ttest$conf.int[1]
      mean.diff.up <- ttest$conf.int[2]
    } else {
      pval.t <- NA
      mean.diff.lo <- NA
      mean.diff.up <- NA
    }

    group0 <- data[data$Group == 0, ]
    group1 <- data[data$Group == 1, ]

    group0.N <- nrow(group0)
    group1.N <- nrow(group1)

    # Print group sizes for debugging
    #print(paste("Cutoff:", cutoffs[i], "Group0.N:", group0.N, "Group1.N:", group1.N))

    if (group0.N == 0 || group1.N == 0) {
      next
    }

    group0.t <- try(stats::t.test(group0[, yvar], alternative = "two.sided", mu = 0, conf.level = 0.95), silent = TRUE)
    group1.t <- try(stats::t.test(group1[, yvar], alternative = "two.sided", mu = 0, conf.level = 0.95), silent = TRUE)

    if (!inherits(group0.t, "try-error")) {
      group0.mean <- unname(group0.t$estimate)
      group0.lower <- group0.t$conf.int[1]
      group0.upper <- group0.t$conf.int[2]
      group0.median <- stats::median(group0[, yvar])
    } else {
      group0.mean <- mean(group0[, yvar], na.rm = TRUE)
      group0.lower <- NA
      group0.upper <- NA
      group0.median <- stats::median(group0[, yvar], na.rm = TRUE)
    }

    if (!inherits(group1.t, "try-error")) {
      group1.mean <- unname(group1.t$estimate)
      group1.lower <- group1.t$conf.int[1]
      group1.upper <- group1.t$conf.int[2]
      group1.median <- stats::median(group1[, yvar])
    } else {
      group1.mean <- mean(group1[, yvar], na.rm = TRUE)
      group1.lower <- NA
      group1.upper <- NA
      group1.median <- stats::median(group1[, yvar], na.rm = TRUE)
    }

    mean.diff <- group0.mean - group1.mean
    median.diff <- group0.median - group1.median

    stat <- rbind(stat, data.frame(Response = yvar.display, Predictor = xvar.display,
                                   Direction = dir, Cutoff = cutoffs[i], Mean.Diff = mean.diff,
                                   Mean.Diff.Lo = mean.diff.lo, Mean.Diff.Up = mean.diff.up,
                                   ttest.pval = pval.t, Median.Diff = median.diff,
                                   Group0.N = group0.N, Group0.Mean = group0.mean,
                                   Group0.Mean.Lo = group0.lower, Group0.Mean.Up = group0.upper,
                                   Group0.Median = group0.median,
                                   Group1.N = group1.N, Group1.Mean = group1.mean,
                                   Group1.Mean.Lo = group1.lower, Group1.Mean.Up = group1.upper,
                                   Group1.Median = group1.median))
  }

  if (is.null(stat) || nrow(stat) == 0) {
    stop("No valid data available after processing cutoffs.")
  }

  stat.display <- stat
  stat.display <- stat.display[, c("Response", "Predictor", "Direction", "Cutoff", "Group0.N", "Group1.N",
                                   "Median.Diff", "Mean.Diff", "Mean.Diff.Lo", "Mean.Diff.Up", "ttest.pval")]
  temp <- stat.display[, c("Median.Diff", "Mean.Diff", "Mean.Diff.Lo", "Mean.Diff.Up")]
  stat.display[, c("Median.Diff", "Mean.Diff", "Mean.Diff.Lo", "Mean.Diff.Up")] <- round(temp, digits = 2)
  stat.display$ttest.pval <- round(stat.display$ttest.pval, digits = 4)
  stat.display$Mean.Diff.CI <- paste(stat.display$Mean.Diff, " (", stat.display$Mean.Diff.Lo, " , ",
                                     stat.display$Mean.Diff.Up, ")", sep = "")
  stat.display <- stat.display[, c("Response", "Predictor", "Direction", "Cutoff", "Group0.N", "Group1.N",
                                   "Median.Diff", "Mean.Diff.CI", "ttest.pval")]

  if (method == "t.test") {
    stat.sel <- stat[which.min(stat$ttest.pval), ]
    stat.sel.display <- stat.display[which.min(stat$ttest.pval), ]
  }

  cut.sel <- stat.sel$Cutoff

  row.names(stat.sel) <- NULL
  row.names(stat.sel.display) <- NULL
  row.names(stat.display) <- NULL

  if (dir == "<") {
    dir.else <- ">="
  } else if (dir == "<=") {
    dir.else <- ">"
  } else if (dir == ">") {
    dir.else <- "<="
  } else if (dir == ">=") {
    dir.else <- "<"
  }

  group0.res <- stat.sel[, c("Response", "Group0.N", "Group0.Mean", "Group0.Mean.Lo", "Group0.Mean.Up", "Group0.Median")]
  group1.res <- stat.sel[, c("Response", "Group1.N", "Group1.Mean", "Group1.Mean.Lo", "Group1.Mean.Up", "Group1.Median")]

  colnames(group0.res) <- c("Response", "N", "Mean", "Mean.Lo", "Mean.Up", "Median")
  colnames(group1.res) <- c("Response", "N", "Mean", "Mean.Lo", "Mean.Up", "Median")
  group.res <- rbind(group0.res, group1.res)


  row.names(group.res) <- c(paste(xvar.display, dir.else, cut.sel), paste(xvar.display, dir, cut.sel))



  group.res.display <- group.res
  group.res.display[, c("Mean", "Mean.Lo", "Mean.Up", "Median")] <- round(group.res.display[, c("Mean", "Mean.Lo", "Mean.Up", "Median")], digits = 2)
  group.res.display$Mean.CI <- paste(group.res.display$Mean, " (", group.res.display$Mean.Lo, " , ", group.res.display$Mean.Up, ")")
  group.res.display <- group.res.display[, c("Response", "N", "Mean.CI", "Median")]

  ## plot ##
  data.plot0 <- stat[, c("Cutoff", "Group0.Mean", "Group0.Mean.Lo", "Group0.Mean.Up")]
  data.plot0$Group <- paste(dir.else, "Cutoff")
  row.names(data.plot0) <- NULL
  colnames(data.plot0) <- c("Cutoff", "Mean", "Mean.Lo", "Mean.Up", "Group")
  data.plot1 <- stat[, c("Cutoff", "Group1.Mean", "Group1.Mean.Lo", "Group1.Mean.Up")]
  data.plot1$Group <- paste(dir, "Cutoff")
  row.names(data.plot1) <- NULL
  colnames(data.plot1) <- c("Cutoff", "Mean", "Mean.Lo", "Mean.Up", "Group")

  data.plot <- rbind(data.plot0, data.plot1)

  if (nrow(data.plot) == 0) {
    stop("No data to plot.")
  }

  data.plot$Cutoff <- factor(data.plot$Cutoff, levels = sort(unique(data.plot$Cutoff)))

  fig <- ggplot(data.plot, aes(x = Cutoff, y = Mean, group = Group, colour = Group)) +
    geom_errorbar(aes(ymin = Mean.Lo, ymax = Mean.Up), width = 0.3, size = 0.8, position = position_dodge(0.2)) +
    geom_line(size = 0.8, position = position_dodge(0.2)) +
    geom_point(size = 1.8, position = position_dodge(0.2)) +
    xlab(paste("Cutoff of", xvar.display)) +
    ggtitle(paste(yvar.display, "~", xvar.display)) +
    theme_bw(base_size = 15) +
    theme(text = element_text(size = 15), axis.text = element_text(size = 15), legend.text = element_text(size = 15))

  if (vert.x) {
    fig <- fig + theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 13))
  }

  list(stat = stat, stat.display = stat.display, stat.sel = stat.sel, stat.sel.display = stat.sel.display, cut.sel = cut.sel,
       group.res = group.res, group.res.display = group.res.display, fig = fig)
}



#' Fixed Cutoff Analysis for Individual Biomarker Associated with Survival Outcome
#'
#' This function conducts fixed cutoff analysis for Individual Biomarker Associated with survival outcome variables.
#'
#' @param yvar Survival response variable name.
#' @param censorvar Censoring variable. 0 indicates censored, 1 indicates an event.
#' @param xvar Biomarker name.
#' @param dir Cutoff direction for the desired subgroup.
#'        Options are ">", ">=", "<", or "<=".
#' @param cutoffs A vector of candidate cutoffs.
#' @param data The dataset containing the variables.
#' @param method Method for cutoff selection.
#'        Currently only supports "logrank".
#'        - "logrank": Minimizes the logrank test p-value.
#' @param yvar.display Display name of the response variable.
#' @param xvar.display Display name of the predictor variable.
#' @param vert.x Whether to display the cutoff in a 90-degree angle when plotting (saves space).
#' @return A list containing statistical summaries, selected cutoff statistics, selected cutoff value, group statistics,
#'         and a ggplot object for visualization.
#' @export
#' @examples
#' # Load a sample dataset
#' data <- data.frame(
#'   time = rexp(100, rate = 0.1),  # survival time
#'   status = sample(c(0, 1), 100, replace = TRUE),  # censoring status
#'   biomarker = rnorm(100, mean = 0, sd = 1)  # biomarker levels
#' )
#'
#' fixcut_sur(
#'   yvar = "time",
#'   censorvar = "status",
#'   xvar = "biomarker",
#'   dir = "<=",
#'   cutoffs = c(-1, 0, 1),
#'   data = data,
#'   method = "logrank",
#'   yvar.display = "Survival Time",
#'   xvar.display = "Biomarker Level",
#'   vert.x = FALSE
#' )
fixcut_sur = function(yvar, censorvar, xvar, dir, cutoffs, data, method="logrank", yvar.display=yvar, xvar.display=xvar, vert.x=FALSE){
  Cutoff=NULL
  HR.Lo=NULL
  HR.Up=NULL

  if(is.null(censorvar)) stop("Censoring variable missing!")
  data = data[,c(xvar, yvar, censorvar)]
  data = data[stats::complete.cases(data),]
  row.names(data)=NULL
  if("Group" %in% names(data)) stop("xvar, censorvar and yvar should not have the name of Group.")
  if(!dir%in%c(">", "<", ">=", "<=")) stop("dir should be >, <, >=, or <=")
  if(dir==">"){
    dir.else = "<="
  }else if(dir==">="){
    dir.else = "<"
  }else if(dir=="<"){
    dir.else = ">="
  }else if(dir=="<="){
    dir.else = ">"
  }


  cutoffs = sort(unique(cutoffs))

  stat = NULL
  for(i in 1:length(cutoffs)){
    if(dir==">"){
      data$Group = (data[,xvar]>cutoffs[i])*1
    }else if(dir==">="){
      data$Group = (data[,xvar]>=cutoffs[i])*1
    }else if(dir=="<"){
      data$Group = (data[,xvar]<cutoffs[i])*1
    }else if(dir=="<="){
      data$Group = (data[,xvar]<=cutoffs[i])*1
    }

    fml = stats::as.formula(paste("Surv(", yvar, ",", censorvar,") ~ Group"))
    res.cox = try(summary(coxph(fml, data=data)), silent=T)
    if(!inherits(res.cox,"try-error")){
      HR = res.cox$conf.int["Group", "exp(coef)"]
      HR.lower = res.cox$conf.int["Group", "lower .95"]
      HR.upper = res.cox$conf.int["Group", "upper .95"]
    }else{
      HR = as.numeric(NA)
      HR.lower = as.numeric(NA)
      HR.upper = as.numeric(NA)
    }

    data$Group = c(paste(xvar.display, dir.else, cutoffs[i]), paste(xvar.display, dir, cutoffs[i]))[data$Group+1]
    data$Group = factor(data$Group, levels=c(paste(xvar.display, dir.else, cutoffs[i]), paste(xvar.display, dir, cutoffs[i])))
    #logrank
    sdf = try(survdiff(fml, data=data, rho = 0),silent=T)
    if(!inherits(sdf,"try-error")){
      pval.logrank = 1 - stats::pchisq(sdf$chisq, length(sdf$n)-1)
    }else{
      pval.logrank = as.numeric(NA)
    }

    sfit = survfit(fml, data = data, conf.type="log-log")
    sfit.res = as.data.frame(summary(sfit, rmean="common")$table[,c("n.start","events", "rmean", "se(rmean)",
                                                                    "median","0.95LCL","0.95UCL")])
    sfit.res$rmean.lower = sfit.res[,"rmean"]-1.96*sfit.res[,"se(rmean)"]
    sfit.res$rmean.upper = sfit.res[,"rmean"]+1.96*sfit.res[,"se(rmean)"]
    sfit.res = sfit.res[, c("n.start","events", "rmean","rmean.lower", "rmean.upper",
                            "median","0.95LCL","0.95UCL")]
    names(sfit.res) = c("N", "Events", "Mean.Surv", "Mean.Surv.Lo", "Mean.Surv.Up", "Med.Surv", "Med.Surv.Lo", "Med.Surv.Up")

    sfit.group0.res = sfit.res[paste("Group=",paste(xvar.display, dir.else, cutoffs[i]),sep=""),]
    sfit.group1.res = sfit.res[paste("Group=",paste(xvar.display, dir, cutoffs[i]),sep=""),]
    names(sfit.group0.res) = paste("Group0.", names(sfit.res),sep="")
    names(sfit.group1.res) = paste("Group1.", names(sfit.res),sep="")

    stat.i = data.frame(Response = yvar.display, Predictor = xvar.display,
                        Direction = dir,Cutoff = cutoffs[i], HR =HR, HR.Lo=HR.lower, HR.Up=HR.upper, logrank.pvalue = pval.logrank)
    stat.i = cbind(stat.i, sfit.group0.res, sfit.group1.res)
    stat = rbind(stat, stat.i)

  }

  stat.display = stat
  temp.name = c("N", "Events", "Mean.Surv", "Mean.Surv.Lo", "Mean.Surv.Up", "Med.Surv", "Med.Surv.Lo", "Med.Surv.Up")
  temp = stat.display[,c("HR","HR.Lo","HR.Up",paste("Group0.", temp.name, sep=""), paste("Group1.", temp.name, sep=""))]
  stat.display[,c("HR","HR.Lo","HR.Up",paste("Group0.", temp.name, sep=""), paste("Group1.", temp.name, sep=""))] = round(temp, digits=2)
  stat.display$logrank.pvalue = round(stat.display$logrank.pvalue, digits=4)

  stat.display$HR.CI = paste(stat.display$HR, " (", stat.display$HR.Lo, " , ", stat.display$HR.Up, ")", sep="")
  stat.display$Group0.Mean.Surv.CI = paste(stat.display$Group0.Mean.Surv, " (", stat.display$Group0.Mean.Surv.Lo, " , ",
                                           stat.display$Group0.Mean.Surv.Up, ")", sep="")
  stat.display$Group0.Med.Surv.CI = paste(stat.display$Group0.Med.Surv, " (", stat.display$Group0.Med.Surv.Lo, " , ",
                                          stat.display$Group0.Med.Surv.Up, ")", sep="")
  stat.display$Group1.Mean.Surv.CI = paste(stat.display$Group1.Mean.Surv, " (", stat.display$Group1.Mean.Surv.Lo, " , ",
                                           stat.display$Group1.Mean.Surv.Up, ")", sep="")
  stat.display$Group1.Med.Surv.CI = paste(stat.display$Group1.Med.Surv, " (", stat.display$Group1.Med.Surv.Lo, " , ",
                                          stat.display$Group1.Med.Surv.Up, ")", sep="")
  stat.display = stat.display[,c("Response", "Predictor", "Direction", "Cutoff", "HR.CI",
                                 "Group0.N", "Group0.Events","Group0.Mean.Surv.CI","Group0.Med.Surv.CI",
                                 "Group1.N", "Group1.Events","Group1.Mean.Surv.CI","Group1.Med.Surv.CI",
                                 "logrank.pvalue")]

  if(method=="logrank"){
    stat.sel = stat[which.min(stat$logrank.pvalue), ]
    stat.sel.display = stat.display[which.min(stat$logrank.pval),]
  }

  row.names(stat.sel) = NULL
  row.names(stat.sel.display) = NULL
  cut.sel = stat.sel$Cutoff

  group0.res = stat.sel[,c("Response", paste("Group0.", temp.name, sep=""))]
  group1.res = stat.sel[,c("Response", paste("Group1.", temp.name, sep=""))]
  names(group0.res) = c("Response",temp.name)
  names(group1.res) = c("Response",temp.name)
  group.res = rbind(group0.res, group1.res)
  row.names(group.res) = c(paste(xvar.display, dir.else, cut.sel),paste(xvar.display, dir, cut.sel))


  group0.res.display = stat.sel.display[,c("Response","Group0.N","Group0.Events","Group0.Mean.Surv.CI","Group0.Med.Surv.CI")]
  group1.res.display = stat.sel.display[,c("Response","Group1.N","Group1.Events","Group1.Mean.Surv.CI","Group1.Med.Surv.CI")]
  names(group0.res.display) = c("Response","N","Events","Mean.Surv.CI","Med.Surv.CI")
  names(group1.res.display) = c("Response","N","Events","Mean.Surv.CI","Med.Surv.CI")
  group.res.display = rbind(group0.res.display, group1.res.display)
  row.names(group.res.display) = c(paste(xvar.display, dir.else, cut.sel),paste(xvar.display, dir, cut.sel))

  ## plot ##
  data.plot = stat[,c("Cutoff","HR","HR.Lo","HR.Up")]
  data.plot$Cutoff = factor(data.plot$Cutoff, levels=sort(unique(data.plot$Cutoff)))

  if(vert.x){
    fig = ggplot(data.plot, aes(x = Cutoff, y = HR))+
      geom_errorbar(aes(ymin=HR.Lo, ymax=HR.Up), width=0.3, size=0.8)+
      geom_line(size=0.8)+
      geom_point(size=1.8)+
      xlab(paste("Cutoff of", xvar.display))+
      ggtitle(paste(yvar.display, "~", xvar.display))+
      theme_bw(base_size=15)+
      theme(text=element_text(size=15), axis.text=element_text(size=15),legend.text=element_text(size=15),
            axis.text.x = element_text(angle = 90, hjust = 1, size=13))
  }else{
    fig = ggplot(data.plot, aes(x = Cutoff, y = HR))+
      geom_errorbar(aes(ymin=HR.Lo, ymax=HR.Up), width=0.3, size=0.8)+
      geom_line(size=0.8)+
      geom_point(size=1.8)+
      xlab(paste("Cutoff of", xvar.display))+
      ggtitle(paste(yvar.display, "~", xvar.display))+
      theme_bw(base_size=15)+
      theme(text=element_text(size=15), axis.text=element_text(size=15),legend.text=element_text(size=15))

  }

  list(stat=stat, stat.display=stat.display, stat.sel=stat.sel, stat.sel.display=stat.sel.display, cut.sel=cut.sel,
       group.res = group.res, group.res.display=group.res.display, fig=fig)
}


#' Cutoff Performance Evaluation
#'
#' This function evaluates the performance of a predictive model at a selected cutoff point.
#'
#' @param yvar Response variable name.
#' @param censorvar Censoring variable name (0-censored, 1-event).
#' @param xvar Biomarker name.
#' @param cutoff Selected cutoff value.
#' @param dir Direction for desired subgroup (">", ">=", "<", "<=").
#' @param xvars.adj Other covariates to adjust when evaluating the performance.
#' @param data Data frame containing the variables.
#' @param type Type of analysis: "c" for continuous, "s" for survival, and "b" for binary.
#' @param yvar.display Display name of response variable.
#' @param xvar.display Display name of biomarker variable.
#' @import survival
#' @import survminer
#' @return A list containing various performance metrics and optionally, plots.
#' @import survminer
#' @importFrom stats confint
#' @importFrom stats glm
#' @export
#' @examples
#' # Load a sample dataset
#' data <- data.frame(
#'   survival_time = rexp(100, rate = 0.1),  # survival time
#'   status = sample(c(0, 1), 100, replace = TRUE),  # censoring status
#'   biomarker = rnorm(100, mean = 0, sd = 1),  # biomarker levels
#'   covariate1 = rnorm(100, mean = 50, sd = 10)  # an additional covariate
#' )
#' # Perform cutoff performance evaluation for continuous outcome
#' data$continuous_outcome <- rnorm(100, mean = 10, sd = 5)
#' cut_perf(
#'   yvar = "continuous_outcome",
#'   xvar = "biomarker",
#'   cutoff = 0.5,
#'   dir = ">=",
#'   data = data,
#'   type = "c",
#'   yvar.display = "Continuous Outcome",
#'   xvar.display = "Biomarker Level"
#' )
#'
#' # Perform cutoff performance evaluation for binary outcome
#' data$binary_outcome <- sample(c(0, 1), 100, replace = TRUE)
#' cut_perf(
#'   yvar = "binary_outcome",
#'   xvar = "biomarker",
#'   cutoff = 0,
#'   dir = "<=",
#'   data = data,
#'   type = "b",
#'   yvar.display = "Binary Outcome",
#'   xvar.display = "Biomarker Level"
#' )

cut_perf = function(yvar, censorvar=NULL, xvar, cutoff, dir, xvars.adj=NULL, data, type, yvar.display=yvar, xvar.display=xvar){
  Group=NULL

  if(is.null(xvars.adj)){
    data = data[, c(yvar,censorvar,xvar)]
    data = data[stats::complete.cases(data),]
    if(!dir%in%c(">", "<", ">=", "<=")) stop("dir should be >, <, >=, or <=")
    if("Group" %in% names(data)) stop("Group cannot be one of the column names of data.")

    if(dir==">"){
      data$Group = (data[,xvar]>cutoff)*1
      dir.else = "<="
    }else if(dir==">="){
      data$Group = (data[,xvar]>=cutoff)*1
      dir.else = "<"
    }else if(dir=="<"){
      data$Group = (data[,xvar]<cutoff)*1
      dir.else = ">="
    }else if(dir=="<="){
      data$Group = (data[,xvar]<=cutoff)*1
      dir.else = ">"
    }

    if(type=="s"){
      if(is.null(censorvar)) stop("Censoring variable missing!")

      #coxph
      fml = stats::as.formula(paste("Surv(", yvar, ",", censorvar,") ~ Group"))
      res.cox = try(summary(coxph(fml, data=data)), silent=T)
      if(!inherits(res.cox,"try-error")){
        HR = res.cox$conf.int["Group", "exp(coef)"]
        HR.lower = res.cox$conf.int["Group", "lower .95"]
        HR.upper = res.cox$conf.int["Group", "upper .95"]
      }else{
        HR = as.numeric(NA)
        HR.lower = as.numeric(NA)
        HR.upper = as.numeric(NA)
      }


      data$Group = c(paste(xvar.display, dir.else, cutoff), paste(xvar.display, dir, cutoff))[data$Group+1]
      data$Group = factor(data$Group, levels=c(paste(xvar.display, dir.else, cutoff), paste(xvar.display, dir, cutoff)))
      #logrank
      sdf = try(survdiff(fml, data=data, rho = 0),silent=T)
      if(!inherits(sdf,"try-error")){
        pval.logrank = 1 - stats::pchisq(sdf$chisq, length(sdf$n)-1)
      }else{
        pval.logrank = as.numeric(NA)
      }

      comp.res = data.frame(HR=HR, HR.lower=HR.lower, HR.upper=HR.upper, logrank.pvalue = pval.logrank)
      comp.res.display = comp.res
      comp.res.display[,c("HR","HR.lower","HR.upper")] = round(comp.res.display[,c("HR","HR.lower","HR.upper")], digits=2)
      comp.res.display$logrank.pvalue = round(comp.res.display$logrank.pvalue, digits=4)

      comp.res.display$HR.CI = paste(comp.res.display$HR, " (", comp.res.display$HR.lower, " , ",
                                     comp.res.display$HR.upper, ")", sep="")
      comp.res.display = comp.res.display[,c("HR.CI", "logrank.pvalue")]
      header = data.frame(Response=yvar.display, Predictor=xvar.display, Direction=dir, Cutoff=cutoff)
      comp.res = cbind(header, comp.res)
      comp.res.display = cbind(header, comp.res.display)


      #median/mean survival time
      sfit = survfit(fml, data = data, conf.type="log-log")
      sfit.res = as.data.frame(summary(sfit, rmean="common")$table[,c("n.start","events", "rmean", "se(rmean)",
                                                                      "median","0.95LCL","0.95UCL")])
      sfit.res$rmean.lower = sfit.res[,"rmean"]-1.96*sfit.res[,"se(rmean)"]
      sfit.res$rmean.upper = sfit.res[,"rmean"]+1.96*sfit.res[,"se(rmean)"]
      sfit.res = sfit.res[, c("n.start","events", "rmean","rmean.lower", "rmean.upper",
                              "median","0.95LCL","0.95UCL")]
      names(sfit.res) = c("N", "Events", "Mean.Surv", "Mean.Surv.Lo", "Mean.Surv.Up", "Med.Surv", "Med.Surv.Lo", "Med.Surv.Up")


      sfit.res.display = sfit.res
      sfit.res.display = round(sfit.res.display, digits=2)
      sfit.res.display$Mean.Surv.CI = paste(sfit.res.display$Mean.Surv, " (", sfit.res.display$Mean.Surv.Lo, " , ",
                                            sfit.res.display$Mean.Surv.Up, ")", sep="")
      sfit.res.display$Med.Surv.CI = paste(sfit.res.display$Med.Surv, " (", sfit.res.display$Med.Surv.Lo, " , ",
                                           sfit.res.display$Med.Surv.Up, ")", sep="")
      sfit.res.display = sfit.res.display[,c("N", "Events", "Mean.Surv.CI", "Med.Surv.CI")]

      sfit.res = cbind(data.frame(Response=yvar.display), sfit.res)
      sfit.res.display = cbind(data.frame(Response=yvar.display), sfit.res.display)

      #plot
      fig = ggsurvplot(surv_fit(fml, data = data, conf.type="log-log"), data = data, risk.table = TRUE,
                       surv.median.line="none", pval=F, pval.method=F,  xlab=yvar.display, legend.title=xvar.display,
                       legend.labs = c(paste(dir.else, cutoff), paste(dir, cutoff)),
                       font.legend= 14, title=paste(yvar.display, "~", xvar.display))


      res = list(comp.res=comp.res, comp.res.display=comp.res.display, group.res=sfit.res, group.res.display=sfit.res.display, fig=fig)


    }else if(type=="c"){
      fml = stats::as.formula(paste(yvar,"~ Group"))
      wrstest = try(stats::wilcox.test(fml, data=data, alternative = "two.sided", mu=0, paired=FALSE), silent=T)
      ttest = try(stats::t.test(fml, data=data, alternative = "two.sided", mu=0, paired = FALSE, var.equal = FALSE,
                         conf.level = 0.95),silent=T)
      if(!inherits(ttest,"try-error")){
        pval.t = ttest$p.value
        mean.diff.lo = ttest$conf.int[1]
        mean.diff.up = ttest$conf.int[2]
      }else{
        pval.t = as.numeric(NA)
        mean.diff.lo = as.numeric(NA)
        mean.diff.up = as.numeric(NA)
      }

      if(!inherits(wrstest,"try-error")){
        pval.wrs = wrstest$p.value
      }else{
        pval.wrs = as.numeric(NA)
      }


      group0 = data[data$Group%in%0,]
      group1 = data[data$Group%in%1,]

      group0.N = dim(group0)[1]
      group1.N = dim(group1)[1]

      group0.t = try(stats::t.test(group0[,yvar], alternative = "two.sided", mu = 0, conf.level = 0.95), silent=T)
      group1.t = try(stats::t.test(group1[,yvar], alternative = "two.sided", mu = 0, conf.level = 0.95), silent=T)

      if(!inherits(group0.t,"try-error")){
        group0.mean = unname(group0.t$estimate)
        group0.lower = group0.t$conf.int[1]
        group0.upper = group0.t$conf.int[2]
        group0.median = stats::median(group0[,yvar])
      }else{
        group0.mean = mean(group0[,yvar])
        group0.lower = as.numeric(NA)
        group0.upper = as.numeric(NA)
        group0.median = stats::median(group0[,yvar])
      }
      group0.res = data.frame(Response=yvar.display, N=group0.N, Mean=group0.mean, Mean.Lo=group0.lower, Mean.Up=group0.upper, Median=group0.median)

      if(!inherits(group1.t,"try-error")){
        group1.mean = unname(group1.t$estimate)
        group1.lower = group1.t$conf.int[1]
        group1.upper = group1.t$conf.int[2]
        group1.median = stats::median(group1[,yvar])
      }else{
        group1.mean = mean(group1[,yvar])
        group1.lower = as.numeric(NA)
        group1.upper = as.numeric(NA)
        group1.median = stats::median(group1[,yvar])
      }
      group1.res = data.frame(Response=yvar.display, N=group1.N, Mean=group1.mean, Mean.Lo=group1.lower, Mean.Up=group1.upper, Median=group1.median)

      mean.diff = group0.mean - group1.mean
      median.diff = group0.median - group1.median

      comp.res = data.frame(Response=yvar.display, Predictor=xvar.display, Direction=dir, Cutoff=cutoff, Mean.Diff = mean.diff,
                            Mean.Diff.Lo = mean.diff.lo, Mean.Diff.Up = mean.diff.up, t.pvalue = pval.t, Median.Diff = median.diff,
                            WRS.pvalue = pval.wrs)
      comp.res.display = comp.res
      temp = comp.res.display[,c("Median.Diff", "Mean.Diff", "Mean.Diff.Lo", "Mean.Diff.Up")]
      comp.res.display[,c("Median.Diff", "Mean.Diff", "Mean.Diff.Lo", "Mean.Diff.Up")] = round(temp, digits = 2)
      comp.res.display[,c("t.pvalue","WRS.pvalue")] = round(comp.res.display[,c("t.pvalue","WRS.pvalue")], digits=4)
      comp.res.display$Mean.Diff.CI = paste(comp.res.display$Mean.Diff, " (", comp.res.display$Mean.Diff.Lo, " , ",
                                            comp.res.display$Mean.Diff.Up, ")", sep="")

      comp.res.display = comp.res.display[,c("Response", "Predictor", "Direction", "Cutoff",
                                             "Median.Diff", "Mean.Diff.CI","t.pvalue", "WRS.pvalue" )]

      group.res = rbind(group0.res, group1.res)
      rownames(group.res) = c(paste(xvar.display, dir.else, cutoff),paste(xvar.display, dir, cutoff))

      group.res.display = group.res
      group.res.display[,c("Mean", "Mean.Lo", "Mean.Up", "Median")] = round(group.res.display[,c("Mean", "Mean.Lo", "Mean.Up", "Median")], digits=2)
      group.res.display$Mean.CI = paste(group.res.display$Mean, " (", group.res.display$Mean.Lo, " , ", group.res.display$Mean.Up, ")")
      group.res.display = group.res.display[,c("Response","N", "Mean.CI", "Median")]

      #plot
      data.plot = data[,c(yvar,"Group")]
      data.plot$Group = c(paste(xvar.display, dir.else, cutoff), paste(xvar.display, dir, cutoff))[data.plot$Group+1]
      names(data.plot) = c("y", "Group")
      fig = ggplot(data.plot, aes(x=Group, y=y)) + geom_boxplot(outlier.shape=NA) +
        stat_summary(fun=mean, geom="point", shape=5, size=4) +
        geom_point(size=2.5, position=position_jitter(width=0.18,height=0))+
        ylab(yvar.display)+
        theme_bw(base_size=15)+
        theme(text=element_text(size=15), axis.text=element_text(size=15),legend.text=element_text(size=15))

      res = list(comp.res=comp.res, comp.res.display=comp.res.display, group.res=group.res, group.res.display=group.res.display, fig=fig)



    }else if(type=="b"){
      if(!all(data[,yvar]%in%c(0,1,NA)) | !all(c(0,1)%in%data[,yvar])) stop("Response needs to be 0/1.")

      x=data[,"Group"]
      y=data[,yvar]
      x=factor(x, levels=c(0,1))
      y=factor(y, levels=c(0,1))

      cont = table(x,y)
      TP = cont[2,2]
      FP = cont[2,1]
      TN = cont[1,1]
      FN = cont[1,2]
      sen = TP/(TP+FN)
      spe = TN/(TN+FP)
      ppv = TP/(TP+FP)
      npv = TN/(TN+FN)
      acc = (TP+TN)/(TP+FP+TN+FN)

      sen.lo = scoreci(x=TP, n=TP+FN, conf.level = 0.95)$conf.int[1]
      sen.up = scoreci(x=TP, n=TP+FN, conf.level = 0.95)$conf.int[2]

      spe.lo = scoreci(x=TN, n=TN+FP, conf.level = 0.95)$conf.int[1]
      spe.up = scoreci(x=TN, n=TN+FP, conf.level = 0.95)$conf.int[2]

      ppv.lo = scoreci(x=TP, n=TP+FP, conf.level = 0.95)$conf.int[1]
      ppv.up = scoreci(x=TP, n=TP+FP, conf.level = 0.95)$conf.int[2]

      npv.lo = scoreci(x=TN, n=TN+FN, conf.level = 0.95)$conf.int[1]
      npv.up = scoreci(x=TN, n=TN+FN, conf.level = 0.95)$conf.int[2]

      acc.lo = scoreci(x=TP+TN, n=TP+FP+TN+FN, conf.level = 0.95)$conf.int[1]
      acc.up = scoreci(x=TP+TN, n=TP+FP+TN+FN, conf.level = 0.95)$conf.int[2]

      fish.p = stats::fisher.test(x=x, y=y, or=1, alternative="two.sided")$p.value

      stat = data.frame(Response = yvar.display, Predictor = xvar.display,
                        Direction = dir,Cutoff = cutoff, N.Sel = TP+FP, N.unSel=TN+FN,
                        Sensitivity = sen, Specificity = spe,
                        PPV = ppv, NPV = npv, Accuracy = acc, Fisher.pvalue = fish.p,
                        Sensitivity.Lo = sen.lo, Sensitivity.Up = sen.up,
                        Specificity.Lo = spe.lo, Specificity.Up = spe.up,
                        PPV.Lo = ppv.lo, PPV.Up = ppv.up, NPV.Lo = npv.lo, NPV.Up = npv.up,
                        Accuracy.Lo = acc.lo, Accuracy.Up = acc.up)


      stat.display = stat
      temp = stat.display[,c("Sensitivity", "Specificity", "PPV", "NPV", "Accuracy", "Sensitivity.Lo", "Sensitivity.Up",
                             "Specificity.Lo", "Specificity.Up", "PPV.Lo", "PPV.Up", "NPV.Lo", "NPV.Up",
                             "Accuracy.Lo", "Accuracy.Up")]
      stat.display[,c("Sensitivity", "Specificity", "PPV", "NPV", "Accuracy", "Sensitivity.Lo", "Sensitivity.Up",
                      "Specificity.Lo", "Specificity.Up", "PPV.Lo", "PPV.Up", "NPV.Lo", "NPV.Up",
                      "Accuracy.Lo", "Accuracy.Up")] = round(temp, digits=2)
      stat.display$Fisher.pvalue = round(stat.display$Fisher.pvalue, digits=4)

      stat.display$Sensitivity.CI = paste(stat.display$Sensitivity, " (", stat.display$Sensitivity.Lo, " , ", stat.display$Sensitivity.Up,")", sep="")
      stat.display$Specificity.CI = paste(stat.display$Specificity, " (", stat.display$Specificity.Lo, " , ", stat.display$Specificity.Up,")", sep="")
      stat.display$PPV.CI = paste(stat.display$PPV, " (", stat.display$PPV.Lo, " , ", stat.display$PPV.Up,")", sep="")
      stat.display$NPV.CI = paste(stat.display$NPV, " (", stat.display$NPV.Lo, " , ", stat.display$NPV.Up,")", sep="")
      stat.display$Accuracy.CI = paste(stat.display$Accuracy, " (", stat.display$Accuracy.Lo, " , ", stat.display$Accuracy.Up,")", sep="")
      stat.display = stat.display[,c("Response","Predictor","Direction", "Cutoff", "Sensitivity.CI",
                                     "Specificity.CI", "PPV.CI", "NPV.CI", "Accuracy.CI", "Fisher.pvalue")]
      #rownames(stat) = NULL
      #rownames(stat.display) = NULL


      cont = as.matrix(cont)
      colnames_cont = c(paste(yvar.display,"=", 0), paste(yvar.display,"=", 1))
      rownames_cont = c(paste(xvar.display,dir.else,cutoff), paste(xvar.display,dir,cutoff))
      temp.table = matrix(paste("(",round(cont/rowSums(cont), digits=3)*100, "%)",sep=""), nrow=dim(cont)[1])
      cont = matrix(paste(cont, temp.table), nrow=dim(cont)[1])
      rownames(cont) = rownames_cont
      colnames(cont) = colnames_cont

      res = list(stat=stat, stat.display=stat.display, confusion=cont)


    }else{
      stop("type can only be c, b or s.")
    }

  }else{
    data = data[, c(yvar,censorvar,xvar, xvars.adj)]
    data = data[stats::complete.cases(data),]

    if(!dir%in%c(">", "<", ">=", "<=")) stop("dir should be >, <, >=, or <=")
    if("Group" %in% names(data)) stop("Group cannot be one of the column names of data.")

    if(dir==">"){
      data$Group = (data[,xvar]>cutoff)*1
      dir.else = "<="
    }else if(dir==">="){
      data$Group = (data[,xvar]>=cutoff)*1
      dir.else = "<"
    }else if(dir=="<"){
      data$Group = (data[,xvar]<cutoff)*1
      dir.else = ">="
    }else if(dir=="<="){
      data$Group = (data[,xvar]<=cutoff)*1
      dir.else = ">"
    }

    if(type=="s"){
      if(is.null(censorvar)) stop("Censoring variable missing!")

      #coxph
      xvars.adj.fml = paste(xvars.adj,collapse="+")
      fml = stats::as.formula(paste("Surv(", yvar, ",", censorvar,") ~ Group +", xvars.adj.fml))
      res.cox = try(summary(coxph(fml, data=data)), silent=T)
      if(!inherits(res.cox,"try-error")){
        HR = res.cox$conf.int["Group", "exp(coef)"]
        HR.lower = res.cox$conf.int["Group", "lower .95"]
        HR.upper = res.cox$conf.int["Group", "upper .95"]
        pval = res.cox$coefficients["Group", "Pr(>|z|)"]
      }else{
        HR = as.numeric(NA)
        HR.lower = as.numeric(NA)
        HR.upper = as.numeric(NA)
        pval = as.numeric(NA)
      }

      comp.res = data.frame(HR=HR, HR.lower=HR.lower, HR.upper=HR.upper, cox.pvalue = pval)
      comp.res.display = comp.res
      comp.res.display[,c("HR","HR.lower","HR.upper")] = round(comp.res.display[,c("HR","HR.lower","HR.upper")], digits=2)
      comp.res.display$cox.pvalue = round(comp.res.display$cox.pvalue, digits=4)
      comp.res.display$HR.CI = paste(comp.res.display$HR, " (", comp.res.display$HR.lower, " , ",
                                     comp.res.display$HR.upper, ")", sep="")
      comp.res.display = comp.res.display[,c("HR.CI", "cox.pvalue")]


      header = data.frame(Response=yvar.display, Predictor=xvar.display, Direction=dir, Cutoff=cutoff)
      comp.res = cbind(header, comp.res)
      comp.res.display = cbind(header, comp.res.display)

      res = list(comp.res=comp.res, comp.res.display=comp.res.display)


    }else if(type=="c"){
      xvars.adj.fml = paste(xvars.adj,collapse="+")
      fml = stats::as.formula(paste(yvar,"~ Group +",xvars.adj.fml))
      lm.fit = try(stats::lm(fml, data=data), silent=T)
      if(!inherits(lm.fit,"try-error")){
        mean.diff = summary(lm.fit)$coefficients["Group","Estimate"]
        mean.diff.lower = confint(lm.fit)["Group","2.5 %"]
        mean.diff.upper = confint(lm.fit)["Group","97.5 %"]
        pval = summary(lm.fit)$coefficients["Group","Pr(>|t|)"]
      }else{
        mean.diff = as.numeric(NA)
        mean.diff.lower = as.numeric(NA)
        mean.diff.upper = as.numeric(NA)
        pval = as.numeric(NA)
      }

      comp.res = data.frame(Mean.Diff = mean.diff, Mean.Diff.Lo = mean.diff.lower, Mean.Diff.Up=mean.diff.upper,
                            lm.pvalue = pval)
      comp.res.display = comp.res
      temp = comp.res.display[,c("Mean.Diff", "Mean.Diff.Lo", "Mean.Diff.Up")]
      comp.res.display[,c("Mean.Diff", "Mean.Diff.Lo", "Mean.Diff.Up")] = round(temp, digits = 2)
      comp.res.display$lm.pvalue = round(comp.res.display$lm.pvalue, digits=4)
      comp.res.display$Mean.Diff.CI = paste(comp.res.display$Mean.Diff, " (", comp.res.display$Mean.Diff.Lo, " , ",
                                            comp.res.display$Mean.Diff.Up, ")", sep="")
      comp.res.display = comp.res.display[,c("Mean.Diff.CI","lm.pvalue" )]


      header = data.frame(Response=yvar.display, Predictor=xvar.display, Direction=dir, Cutoff=cutoff)
      comp.res = cbind(header, comp.res)
      comp.res.display = cbind(header, comp.res.display)

      res = list(comp.res=comp.res, comp.res.display=comp.res.display)

    }else if(type=="b"){

      if(!all(data[,yvar]%in%c(0,1,NA)) | !all(c(0,1)%in%data[,yvar])) stop("Response needs to be 0/1.")

      xvars.adj.fml = paste(xvars.adj,collapse="+")
      fml = stats::as.formula(paste(yvar,"~ Group +",xvars.adj.fml))
      glm.fit = try(glm(fml, family=stats::binomial, data=data), silent=T)
      if(!inherits(glm.fit,"try-error")){
        log.OR = summary(glm.fit)$coefficients["Group", "Estimate"]
        log.OR.lower = confint(glm.fit)["Group","2.5 %"]
        log.OR.upper = confint(glm.fit)["Group","97.5 %"]
        pval = summary(glm.fit)$coefficients["Group", "Pr(>|z|)"]
      }else{
        log.OR = as.numeric(NA)
        log.OR.lower = as.numeric(NA)
        log.OR.upper = as.numeric(NA)
        pval = as.numeric(NA)

      }

      comp.res = data.frame(log.OR = log.OR, log.OR.Lo = log.OR.lower, log.OR.Up = log.OR.upper,
                            glm.pvalue = pval)
      comp.res.display = comp.res
      comp.res.display[,c("log.OR", "log.OR.Lo", "log.OR.Up")] = round(comp.res.display[,c("log.OR", "log.OR.Lo", "log.OR.Up")], digits=2)
      comp.res.display$glm.pvalue= round(comp.res.display$glm.pvalue, digits=4)
      comp.res.display$log.OR.CI = paste(comp.res.display$log.OR, " (", comp.res.display$log.OR.Lo, " , ",
                                         comp.res.display$log.OR.Up, ")", sep="")
      comp.res.display = comp.res.display[,c("log.OR.CI","glm.pvalue" )]

      header = data.frame(Response=yvar.display, Predictor=xvar.display, Direction=dir, Cutoff=cutoff)
      comp.res = cbind(header, comp.res)
      comp.res.display = cbind(header, comp.res.display)

      res = list(comp.res=comp.res, comp.res.display=comp.res.display)

    }else{
      stop("type can only be c, b or s.")
    }


  }

  res

}


#' Summarize Categorical Variables in Subgroup
#'
#' @description This function provides a summary of categorical variables in a dataset.
#'
#' @param yvar Name of the variable for summary.
#' @param yname A vector of ordered y values.
#' @param xvars Names of the variables for grouping.
#' @param xname.list A list (same order as xvars) of ordered x values for each xvar.
#' @param data The dataset.
#' @param yvar.display Display name for yvar.
#' @param xvars.display Display name for xvars.
#' @return A list containing the contingency table, frequency table, and percentage table.
#' @export
#' @examples
#' # Load a sample dataset
#' data <- data.frame(
#'   outcome = sample(c("A", "B", "C"), 100, replace = TRUE),  # categorical outcome
#'   group1 = sample(c("Male", "Female"), 100, replace = TRUE),  # group variable 1
#'   group2 = sample(c("Young", "Old"), 100, replace = TRUE)  # group variable 2
#' )
#'
#' # Summarize categorical outcome by two grouping variables
#' cat_summary(
#'   yvar = "outcome",
#'   yname = c("A", "B", "C"),  # ordered categories for outcome
#'   xvars = c("group1", "group2"),
#'   xname.list = list(c("Male", "Female"), c("Young", "Old")),
#'   data = data,
#'   yvar.display = "Outcome Category",
#'   xvars.display = c("Gender", "Age Group")
#' )

cat_summary = function(yvar, yname, xvars, xname.list, data, yvar.display=yvar, xvars.display=xvars){
  #yvar: name of the variable for summary
  #yname: a vector of ordered y values
  #xvars: names of the variables for grouping
  #xname.list: a list (same order as xvars) of ordered x values for each xvar
  #yvar.display: display name for yvar
  #xvars.display: displayname for xvars

  if(!is.list(xname.list)) stop("xname.list has to be a list.")
  data = data[,c(yvar, xvars)]
  data = data[stats::complete.cases(data),]
  yname = unique(yname)
  if(!all(yname%in%data[,yvar]) | !all(data[,yvar]%in%yname))
    warning("yname should be the values for the yvar variable.")
  data[,yvar] = factor(data[,yvar], levels=yname)

  if(length(xvars)!=length(xname.list)) stop("xvars and xname.list do not have the same length.")
  for(i in 1:length(xvars)){
    xname.list[[i]]=unique(xname.list[[i]])
    if(!all(xname.list[[i]]%in%data[,xvars[i]]) | !all(data[,xvars[i]]%in%xname.list[[i]]))
      warning("xname should be the values for the xvar variable.")
    data[,xvars[i]] = factor(data[,xvars[i]], levels=xname.list[[i]])
  }

  data = data[,c(xvars,yvar)]

  freq_table = table(data)
  perc_table = round(prop.table(freq_table,margin=1:length(xvars)), digits=3)*100
  freq_table = stats::ftable(freq_table)
  perc_table = stats::ftable(perc_table)

  perc_table.display = matrix(paste("(",perc_table,"%)", sep=""), nrow=nrow(perc_table))
  cont.display = matrix(paste(freq_table, perc_table.display), nrow=nrow(freq_table))
  cont.display = as.data.frame(cont.display, stringsAsFactors = F)
  names(cont.display) = paste(yvar.display, "=", yname)

  header = expand.grid(xname.list[length(xname.list):1], stringsAsFactors=F)[length(xname.list):1]
  names(header) = xvars.display

  cont.display = cbind(header, cont.display)
  list(cont.display=cont.display, freq_table=freq_table, perc_table=perc_table)

}



#' Subgroup Performance Evaluation for Predictive Cases
#'
#' @description This function evaluates the performance of subgroups based on different types of response variables in predictive cases.
#'
#' @param yvar Response variable name.
#' @param censorvar Censoring variable name (0-censored, 1-event).
#' @param grpvar Subgroup variable name.
#' @param grpname A vector of ordered subgroup names (values in the column of grpvar).
#' @param trtvar Treatment variable name.
#' @param trtname A vector of ordered treatment names (values in the column of trtvar).
#' @param xvars.adj Other covariates to adjust when evaluating the performance.
#' @param data The dataset.
#' @param type "c" for continuous; "s" for "survival", and "b" for binary.
#' @param yvar.display Display name of the response variable.
#' @param grpvar.display Display name of the group variable.
#' @param trtvar.display Display name of the treatment variable.
#' @return A list containing the comparison results, group results, and possibly a plot.
#' @importFrom car Anova
#' @export
#' @examples
#' # Load a sample dataset
#' data <- data.frame(
#'   response = rnorm(100, mean = 10, sd = 5),  # continuous response
#'   survival_time = rexp(100, rate = 0.1),  # survival time
#'   status = sample(c(0, 1), 100, replace = TRUE),  # censoring status
#'   group = sample(c("Low", "Medium", "High"), 100, replace = TRUE),  # subgroup variable
#'   treatment = sample(c("A", "B"), 100, replace = TRUE)  # treatment variable
#' )
#'
#' # Subgroup performance evaluation for predictive cases - survival analysis
#' subgrp_perf_pred(
#'   yvar = "survival_time",
#'   censorvar = "status",
#'   grpvar = "group",
#'   grpname = c("Low", "Medium", "High"),
#'   trtvar = "treatment",
#'   trtname = c("A", "B"),
#'   data = data,
#'   type = "s",
#'   yvar.display = "Survival Time",
#'   grpvar.display = "Risk Group",
#'   trtvar.display = "Treatment"
#' )
#'
#' # Subgroup performance evaluation for predictive cases - continuous outcome
#' subgrp_perf_pred(
#'   yvar = "response",
#'   grpvar = "group",
#'   grpname = c("Low", "Medium", "High"),
#'   trtvar = "treatment",
#'   trtname = c("A", "B"),
#'   data = data,
#'   type = "c",
#'   yvar.display = "Response",
#'   grpvar.display = "Risk Group",
#'   trtvar.display = "Treatment"
#' )
#'
#' # Subgroup performance evaluation for predictive cases - binary outcome
#' data$binary_response <- sample(c(0, 1), 100, replace = TRUE)
#' subgrp_perf_pred(
#'   yvar = "binary_response",
#'   grpvar = "group",
#'   grpname = c("Low", "Medium", "High"),
#'   trtvar = "treatment",
#'   trtname = c("A", "B"),
#'   data = data,
#'   type = "b",
#'   yvar.display = "Binary Response",
#'   grpvar.display = "Risk Group",
#'   trtvar.display = "Treatment"
#' )

subgrp_perf_pred = function(yvar, censorvar=NULL, grpvar, grpname, trtvar, trtname, xvars.adj=NULL, data,
                            type, yvar.display=yvar, grpvar.display=grpvar, trtvar.display=trtvar){
  Group=NULL
  y=NULL
  Trt=NULL
  grp=NULL
  Rate=NULL
  trt=NULL
  Rate.Lo=NULL
  Rate.Up=NULL
  data = data[, c(yvar,censorvar,grpvar,trtvar,xvars.adj)]
  data = data[stats::complete.cases(data),]

  grpname = unique(grpname)
  if(!all(grpname%in%data[,grpvar]) | !all(data[,grpvar]%in%grpname))
    stop("grpname should be the values for the grpvar variable.")

  trtname = unique(trtname)
  if(!all(trtname%in%data[,trtvar]) | !all(data[,trtvar]%in%trtname))
    stop("trtname should be the values for the trtvar variable.")

  data[,grpvar] = factor(data[,grpvar], levels=grpname)
  data[,trtvar] = factor(data[,trtvar], levels=trtname)

  unicomb1 = paste(rep(trtvar.display, length(trtname)*length(grpname)),"=",
                   rep(trtname, each=length(grpname)), ",",
                   rep(grpvar.display, length(trtname)*length(grpname)),"=",
                   rep(grpname, length(trtname)), sep="")
  datacomb1 = paste(trtvar.display,"=",data[,trtvar], ",",
                    grpvar.display,"=", data[,grpvar], sep="")
  unicomb1 = unicomb1[unicomb1%in%datacomb1]

  unicomb2 = paste(rep(trtname, each=length(grpname)), ",",
                   rep(grpname, length(trtname)), sep="")
  datacomb2 = paste(data[,trtvar], ",", data[,grpvar], sep="")
  unicomb2 = unicomb2[unicomb2%in%datacomb2]


  if(type=="s"){
    if(is.null(censorvar)) stop("Censoring variable missing!")

    #coxph
    if(is.null(xvars.adj)){
      xvars.adj.fml=NULL
    }else{
      xvars.adj.fml = paste(c("",xvars.adj),collapse="+")
    }

    fml = stats::as.formula(paste("Surv(", yvar, ",", censorvar,") ~ ", trtvar, "*",grpvar, xvars.adj.fml, sep=""))
    res.cox = try(Anova(coxph(fml, data=data), type=3, test.statistic="Wald"), silent=T)
    if(!inherits(res.cox,"try-error")){
      pval.pred = res.cox[paste(trtvar,":",grpvar,sep=""),"Pr(>Chisq)"]
      pval.prog = res.cox[grpvar, "Pr(>Chisq)"]

    }else{
      pval.pred = as.numeric(NA)
      pval.prog = as.numeric(NA)
    }

    comp.res = data.frame(pvalue.pred = pval.pred, pvalue.prog=pval.prog)
    comp.res.display = comp.res
    comp.res.display= round(comp.res.display, digits=4)

    header = data.frame(Response=yvar.display, Group.Variable=grpvar.display, Test="Cox ANOVA")
    comp.res = cbind(header, comp.res)
    comp.res.display = cbind(header, comp.res.display)


    #median/mean survival time
    fml = stats::as.formula(paste("Surv(", yvar, ",", censorvar,") ~ ", trtvar, "+",grpvar, sep=""))
    sfit = survfit(fml, data = data, conf.type="log-log")
    sfit.res = as.data.frame(summary(sfit, rmean="common")$table[,c("n.start","events", "rmean", "se(rmean)",
                                                                    "median","0.95LCL","0.95UCL")])
    sfit.res$rmean.lower = sfit.res[,"rmean"]-1.96*sfit.res[,"se(rmean)"]
    sfit.res$rmean.upper = sfit.res[,"rmean"]+1.96*sfit.res[,"se(rmean)"]
    sfit.res = sfit.res[, c("n.start","events", "rmean","rmean.lower", "rmean.upper",
                            "median","0.95LCL","0.95UCL")]
    names(sfit.res) = c("N", "Events", "Mean.Surv", "Mean.Surv.Lo", "Mean.Surv.Up", "Med.Surv", "Med.Surv.Lo", "Med.Surv.Up")
    row.names(sfit.res) = unicomb1

    sfit.res.display = sfit.res
    sfit.res.display = round(sfit.res.display, digits=2)
    sfit.res.display$Mean.Surv.CI = paste(sfit.res.display$Mean.Surv, " (", sfit.res.display$Mean.Surv.Lo, " , ",
                                          sfit.res.display$Mean.Surv.Up, ")", sep="")
    sfit.res.display$Med.Surv.CI = paste(sfit.res.display$Med.Surv, " (", sfit.res.display$Med.Surv.Lo, " , ",
                                         sfit.res.display$Med.Surv.Up, ")", sep="")
    sfit.res.display = sfit.res.display[,c("N", "Events", "Mean.Surv.CI", "Med.Surv.CI")]

    sfit.res = cbind(data.frame(Response=yvar.display), sfit.res)
    sfit.res.display = cbind(data.frame(Response=yvar.display), sfit.res.display)

    #plot
    fig = ggsurvplot(surv_fit(fml, data = data, conf.type="log-log"), data = data, risk.table = TRUE,
                     surv.median.line="none", pval=F, pval.method=F,  xlab=yvar.display,
                     legend.title=paste(trtvar.display,",", grpvar.display),
                     legend.labs = unicomb2, font.legend= 14,
                     title=paste("KM Curves for", yvar.display))


    res = list(comp.res=comp.res, comp.res.display=comp.res.display,
               group.res=sfit.res, group.res.display=sfit.res.display, fig=fig)

  }else if(type=="c"){
    if(is.null(xvars.adj)){
      xvars.adj.fml=NULL
    }else{
      xvars.adj.fml = paste(c("",xvars.adj),collapse="+")
    }

    fml = stats::as.formula(paste(yvar, " ~ ", trtvar, "*",grpvar, xvars.adj.fml, sep=""))
    lm.fit = try(Anova(stats::lm(fml, data=data), type=3), silent=T)
    if(!inherits(lm.fit,"try-error")){
      pval.pred = lm.fit[paste(trtvar,":",grpvar,sep=""),"Pr(>F)"]
      pval.prog = lm.fit[grpvar, "Pr(>F)"]

    }else{
      pval.pred = as.numeric(NA)
      pval.prog = as.numeric(NA)
    }

    comp.res = data.frame(pvalue.pred = pval.pred, pvalue.prog=pval.prog)
    comp.res.display = comp.res
    comp.res.display= round(comp.res.display, digits=4)

    header = data.frame(Response=yvar.display, Group.Variable=grpvar.display, Test="LM ANOVA")
    comp.res = cbind(header, comp.res)
    comp.res.display = cbind(header, comp.res.display)


    ## group res
    group.res = NULL
    for(unicomb2.i in unicomb2){
      datai = data[datacomb2%in%unicomb2.i,]
      datai.N = dim(datai)[1]
      datai.t = try(stats::t.test(datai[,yvar], alternative = "two.sided", mu = 0, conf.level = 0.95), silent=T)

      if(!inherits(datai.t,"try-error")){
        datai.mean = unname(datai.t$estimate)
        datai.lower = datai.t$conf.int[1]
        datai.upper = datai.t$conf.int[2]
        datai.median = stats::median(datai[,yvar])
      }else{
        datai.mean = mean(datai[,yvar])
        datai.lower = as.numeric(NA)
        datai.upper = as.numeric(NA)
        datai.median = stats::median(datai[,yvar])
      }

      datai.res = data.frame(Response=yvar.display, N=datai.N, Mean=datai.mean, Mean.Lo=datai.lower,
                             Mean.Up=datai.upper, Median=datai.median)
      row.names(datai.res) = paste(trtvar.display,",",grpvar.display," = ",unicomb2.i, sep="")

      group.res = rbind(group.res, datai.res)
    }

    group.res.display = group.res
    group.res.display[,c("Mean", "Mean.Lo", "Mean.Up", "Median")] = round(group.res.display[,c("Mean", "Mean.Lo", "Mean.Up", "Median")], digits=2)
    group.res.display$Mean.CI = paste(group.res.display$Mean, " (", group.res.display$Mean.Lo, " , ", group.res.display$Mean.Up, ")")
    group.res.display = group.res.display[,c("Response","N", "Mean.CI", "Median")]

    #plot
    data.plot = data[, c(yvar,grpvar,trtvar)]
    names(data.plot) = c("y", "Group", "Trt")

    fig = ggplot(data.plot, aes(x=Group, y=y, fill=Trt)) +
      geom_boxplot(outlier.shape=NA, outlier.colour=NA, position=position_dodge(0.8)) +
      stat_summary(fun=mean, geom="point", shape=5, size=3, position=position_dodge(0.8)) +
      geom_point(size=1.8, position=position_jitterdodge(jitter.width=0.13, dodge.width =0.8))+
      labs(x=grpvar.display, y=yvar.display, fill=trtvar.display)+
      theme_bw(base_size=15)+
      theme(text=element_text(size=15), axis.text=element_text(size=15),legend.text=element_text(size=15))


    res = list(comp.res=comp.res, comp.res.display=comp.res.display,
               group.res=group.res, group.res.display=group.res.display, fig=fig)



  }else if(type=="b"){
    if(!all(data[,yvar]%in%c(0,1,NA)) | !all(c(0,1)%in%data[,yvar])) stop("Response needs to be 0/1.")

    if(is.null(xvars.adj)){
      xvars.adj.fml=NULL
    }else{
      xvars.adj.fml = paste(c("",xvars.adj),collapse="+")
    }

    fml = stats::as.formula(paste(yvar, " ~ ", trtvar, "*",grpvar, xvars.adj.fml, sep=""))
    glm.fit = try(Anova(glm(fml, family=stats::binomial, data=data), type=3, test.statistic="Wald"), silent=T)

    if(!inherits(glm.fit,"try-error")){
      pval.pred = glm.fit[paste(trtvar,":",grpvar,sep=""),"Pr(>Chisq)"]
      pval.prog = glm.fit[grpvar, "Pr(>Chisq)"]

    }else{
      pval.pred = as.numeric(NA)
      pval.prog = as.numeric(NA)
    }

    comp.res = data.frame(pvalue.pred = pval.pred, pvalue.prog=pval.prog)
    comp.res.display = comp.res
    comp.res.display= round(comp.res.display, digits=4)

    header = data.frame(Response=yvar.display, Group.Variable=grpvar.display, Test="GLM ANOVA")
    comp.res = cbind(header, comp.res)
    comp.res.display = cbind(header, comp.res.display)

    #Group Res
    group.res = NULL
    for(unicomb2.i in unicomb2){
      datai = data[datacomb2%in%unicomb2.i,]
      datai.N = dim(datai)[1]
      datai.n = sum(datai[,yvar])
      datai.rr = mean(datai[,yvar])
      datai.rrlo = scoreci(x=sum(datai[,yvar]), n=datai.N, conf.level = 0.95)$conf.int[1]
      datai.rrup = scoreci(x=sum(datai[,yvar]), n=datai.N, conf.level = 0.95)$conf.int[2]

      datai.res = data.frame(Response=yvar.display, Treatment=datai[1,trtvar],
                             Group=datai[1,grpvar], n=datai.n, N=datai.N, Rate=datai.rr,
                             Rate.Lo=datai.rrlo, Rate.Up=datai.rrup)
      names(datai.res) = c("Response", trtvar.display, grpvar.display, "n", "N", "Rate",
                           "Rate.Lo", "Rate.Up")
      group.res = rbind(group.res, datai.res)
    }

    group.res.display = group.res
    group.res.display[,c("Rate", "Rate.Lo", "Rate.Up")] = round(group.res.display[,c("Rate", "Rate.Lo", "Rate.Up")], digits=3)
    group.res.display$Rate.CI = paste(group.res.display$Rate*100, "% (", group.res.display$Rate.Lo*100, "%",
                                      ", ",group.res.display$Rate.Up*100, "%)", sep="")
    group.res.display = group.res.display[,c("Response", trtvar.display, grpvar.display, "n", "N", "Rate.CI")]


    ##PLOT
    data.plot = group.res[,c(trtvar.display, grpvar.display, "Rate",  "Rate.Lo", "Rate.Up")]
    names(data.plot) = c("trt", "grp", "Rate",  "Rate.Lo", "Rate.Up")
    fig = ggplot(data.plot, aes(x=grp, y=Rate, fill=trt)) +
      geom_bar(position=position_dodge(0.9), stat="identity") +
      geom_errorbar(aes(ymin=Rate.Lo, ymax=Rate.Up),width=.2, position=position_dodge(.9))+
      labs(x=grpvar.display, y=yvar.display, fill=trtvar.display)+
      theme_bw(base_size=15)+
      theme(text=element_text(size=15), axis.text=element_text(size=15),legend.text=element_text(size=15))

    res = list(comp.res=comp.res, comp.res.display=comp.res.display,
               group.res=group.res, group.res.display=group.res.display, fig=fig)

  }else{
    stop("type can only be c, b or s.")
  }

}

#' Subgroup Performance Evaluation for Prognostic Cases
#'
#' This function evaluates subgroup performance based on different types of response variables.
#'
#' @param yvar The response variable name.
#' @param censorvar (Optional) The censoring variable name (0-censored, 1-event).
#' @param grpvar The subgroup variable name.
#' @param grpname A vector of ordered subgroup names (values in the column of grpvar).
#' @param xvars.adj (Optional) Other covariates to adjust when evaluating the performance.
#' @param data The dataset containing the variables.
#' @param type The type of response variable: "c" for continuous, "s" for survival, and "b" for binary.
#' @param yvar.display Display name of the response variable.
#' @param grpvar.display Display name of the group variable.
#'
#' @return A list containing subgroup performance results including logrank p-value, median and mean survival, Cox model p-value, ANOVA p-value, and more based on the specified response variable type.
#' @importFrom onewaytests welch.test
#' @importFrom car Anova
#' @export
#' @examples
#' # Load a sample dataset
#' data <- data.frame(
#'   survival_time = rexp(100, rate = 0.1),  # survival time
#'   status = sample(c(0, 1), 100, replace = TRUE),  # censoring status
#'   group = sample(c("Low", "Medium", "High"), 100, replace = TRUE),  # subgroup variable
#'   covariate = rnorm(100, mean = 50, sd = 10)  # an additional covariate
#' )
#'
#' # Perform subgroup performance evaluation for survival analysis
#' subgrp_perf(
#'   yvar = "survival_time",
#'   censorvar = "status",
#'   grpvar = "group",
#'   grpname = c("Low", "Medium", "High"),
#'   data = data,
#'   type = "s",
#'   yvar.display = "Survival Time",
#'   grpvar.display = "Risk Group"
#' )
#'
#' # Perform subgroup performance evaluation for continuous outcome
#' data$continuous_outcome <- rnorm(100, mean = 10, sd = 5)
#' subgrp_perf(
#'   yvar = "continuous_outcome",
#'   grpvar = "group",
#'   grpname = c("Low", "Medium", "High"),
#'   data = data,
#'   type = "c",
#'   yvar.display = "Continuous Outcome",
#'   grpvar.display = "Risk Group"
#' )
#'
#' # Perform subgroup performance evaluation for binary outcome
#' data$binary_outcome <- sample(c(0, 1), 100, replace = TRUE)
#' subgrp_perf(
#'   yvar = "binary_outcome",
#'   grpvar = "group",
#'   grpname = c("Low", "Medium", "High"),
#'   data = data,
#'   type = "b",
#'   yvar.display = "Binary Outcome",
#'   grpvar.display = "Risk Group"
#' )

subgrp_perf = function(yvar, censorvar=NULL, grpvar, grpname, xvars.adj=NULL, data, type, yvar.display=yvar, grpvar.display=grpvar){
  Group=NULL
  y=NULL

  if(is.null(xvars.adj)){
    data = data[, c(yvar,censorvar,grpvar)]
    data = data[stats::complete.cases(data),]
    grpname = unique(grpname)
    if(!all(grpname%in%data[,grpvar]) | !all(data[,grpvar]%in%grpname))
      stop("grpname should be the values for the grpvar variable.")

    data[,grpvar] = factor(data[,grpvar], levels=grpname)

    if(type=="s"){
      if(is.null(censorvar)) stop("Censoring variable missing!")

      #logrank
      fml = stats::as.formula(paste("Surv(", yvar, ",", censorvar,") ~ ",grpvar))
      sdf = try(survdiff(fml, data=data, rho = 0),silent=T)
      if(!inherits(sdf,"try-error")){
        pval.logrank = 1 - stats::pchisq(sdf$chisq, length(sdf$n)-1)
      }else{
        pval.logrank = as.numeric(NA)
      }

      comp.res = data.frame(logrank.pvalue = pval.logrank)
      comp.res.display = comp.res
      comp.res.display$logrank.pvalue = round(comp.res.display$logrank.pvalue, digits=4)

      header = data.frame(Response=yvar.display, Group.Variable=grpvar.display)
      comp.res = cbind(header, comp.res)
      comp.res.display = cbind(header, comp.res.display)


      #median/mean survival time
      sfit = survfit(fml, data = data, conf.type="log-log")
      sfit.res = as.data.frame(summary(sfit, rmean="common")$table[,c("n.start","events", "rmean", "se(rmean)",
                                                                      "median","0.95LCL","0.95UCL")])
      sfit.res$rmean.lower = sfit.res[,"rmean"]-1.96*sfit.res[,"se(rmean)"]
      sfit.res$rmean.upper = sfit.res[,"rmean"]+1.96*sfit.res[,"se(rmean)"]
      sfit.res = sfit.res[, c("n.start","events", "rmean","rmean.lower", "rmean.upper",
                              "median","0.95LCL","0.95UCL")]
      names(sfit.res) = c("N", "Events", "Mean.Surv", "Mean.Surv.Lo", "Mean.Surv.Up", "Med.Surv", "Med.Surv.Lo", "Med.Surv.Up")
      row.names(sfit.res) = paste(grpvar.display,"=",grpname, sep=" ")


      sfit.res.display = sfit.res
      sfit.res.display = round(sfit.res.display, digits=2)
      sfit.res.display$Mean.Surv.CI = paste(sfit.res.display$Mean.Surv, " (", sfit.res.display$Mean.Surv.Lo, " , ",
                                            sfit.res.display$Mean.Surv.Up, ")", sep="")
      sfit.res.display$Med.Surv.CI = paste(sfit.res.display$Med.Surv, " (", sfit.res.display$Med.Surv.Lo, " , ",
                                           sfit.res.display$Med.Surv.Up, ")", sep="")
      sfit.res.display = sfit.res.display[,c("N", "Events", "Mean.Surv.CI", "Med.Surv.CI")]

      sfit.res = cbind(data.frame(Response=yvar.display), sfit.res)
      sfit.res.display = cbind(data.frame(Response=yvar.display), sfit.res.display)

      #plot
      fig = ggsurvplot(surv_fit(fml, data = data, conf.type="log-log"), data = data, risk.table = TRUE,
                       surv.median.line="none", pval=F, pval.method=F,  xlab=yvar.display,
                       legend.title=grpvar.display, legend.labs = grpname,
                       font.legend= 14, title=paste(yvar.display, "~", grpvar.display))


      res = list(comp.res=comp.res, comp.res.display=comp.res.display, group.res=sfit.res, group.res.display=sfit.res.display, fig=fig)


    }else if(type =="c"){
      fml = stats::as.formula(paste(yvar, " ~ ",grpvar))
      welchtest = try(welch.test(fml, data=data, rate = 0, alpha = 0.05, na.rm = TRUE, verbose = F), silent=T)
      kwtest = try(stats::kruskal.test(fml, data=data), silent=T)

      if(!inherits(welchtest,"try-error")){
        pval.welch = welchtest$p.value
      }else{
        pval.welch = as.numeric(NA)
      }

      if(!inherits(kwtest,"try-error")){
        pval.kw = kwtest$p.value
      }else{
        pval.kw = as.numeric(NA)
      }

      comp.res = data.frame(Response=yvar.display, Group.Variable=grpvar.display,
                            Welch.pvalue=pval.welch, KW.pvalue = pval.kw)
      comp.res.display = comp.res
      comp.res.display[,c("Welch.pvalue","KW.pvalue")] = round(comp.res.display[,c("Welch.pvalue","KW.pvalue")], digits=4)


      group.res = NULL
      for(grp.i in grpname){
        datai = data[data[,grpvar]%in%grp.i,]
        datai.N = dim(datai)[1]
        datai.t = try(stats::t.test(datai[,yvar], alternative = "two.sided", mu = 0, conf.level = 0.95), silent=T)

        if(!inherits(datai.t,"try-error")){
          datai.mean = unname(datai.t$estimate)
          datai.lower = datai.t$conf.int[1]
          datai.upper = datai.t$conf.int[2]
          datai.median = stats::median(datai[,yvar])
        }else{
          datai.mean = mean(datai[,yvar])
          datai.lower = as.numeric(NA)
          datai.upper = as.numeric(NA)
          datai.median = stats::median(datai[,yvar])
        }

        datai.res = data.frame(Response=yvar.display, N=datai.N, Mean=datai.mean, Mean.Lo=datai.lower,
                               Mean.Up=datai.upper, Median=datai.median)
        row.names(datai.res) = paste(grpvar.display,"=",grp.i, sep=" ")

        group.res = rbind(group.res, datai.res)
      }

      group.res.display = group.res
      group.res.display[,c("Mean", "Mean.Lo", "Mean.Up", "Median")] = round(group.res.display[,c("Mean", "Mean.Lo", "Mean.Up", "Median")], digits=2)
      group.res.display$Mean.CI = paste(group.res.display$Mean, " (", group.res.display$Mean.Lo, " , ", group.res.display$Mean.Up, ")")
      group.res.display = group.res.display[,c("Response","N", "Mean.CI", "Median")]


      #plot
      data.plot = data[,c(yvar,grpvar)]
      names(data.plot) = c("y", "Group")

      fig = ggplot(data.plot, aes(x=Group, y=y)) + geom_boxplot(outlier.shape=NA) +
        stat_summary(fun=mean, geom="point", shape=5, size=4) +
        geom_point(size=2.5, position=position_jitter(width=0.18,height=0))+
        ylab(yvar.display)+xlab(grpvar.display)+
        theme_bw(base_size=15)+
        theme(text=element_text(size=15), axis.text=element_text(size=15),legend.text=element_text(size=15))

      res = list(comp.res=comp.res, comp.res.display=comp.res.display, group.res=group.res, group.res.display=group.res.display, fig=fig)


    }else if(type=="b"){
      if(!all(data[,yvar]%in%c(0,1,NA)) | !all(c(0,1)%in%data[,yvar])) stop("Response needs to be 0/1.")

      data[,yvar] = factor(data[,yvar], levels=c(0,1))
      cont = table(data[,grpvar], data[,yvar])
      colnames(cont) = paste(yvar.display, "=", colnames(cont))
      rownames(cont) = paste(grpvar.display, "=", rownames(cont))

      fish.p = stats::fisher.test(x=data[,grpvar], y=data[,yvar])$p.value

      comp.res =  data.frame(Response = yvar.display, Group.Variable=grpvar.display,
                             Fisher.pvalue = fish.p)
      comp.res.display = comp.res
      comp.res.display$Fisher.pvalue = round(comp.res.display$Fisher.pvalue, digits=4)

      temp.table = matrix(paste("(",round(cont/rowSums(cont), digits=3)*100, "%)",sep=""), nrow=dim(cont)[1])
      cont.display = matrix(paste(cont, temp.table), nrow=dim(cont)[1])
      rownames(cont.display) = rownames(cont)
      colnames(cont.display) = colnames(cont)

      res = list(comp.res=comp.res, comp.res.display=comp.res.display,
                 group.res=cont, group.res.display=cont.display)


    }else{
      stop("type can only be c, b or s.")
    }
  }else{
    data = data[, c(yvar,censorvar,grpvar,xvars.adj)]
    data = data[stats::complete.cases(data),]
    grpname = unique(grpname)
    if(!all(grpname%in%data[,grpvar]) | !all(data[,grpvar]%in%grpname))
      stop("grpname should be the values for the grpvar variable.")

    data[,grpvar] = factor(data[,grpvar], levels=grpname)

    if(type=="s"){
      if(is.null(censorvar)) stop("Censoring variable missing!")

      #coxph
      xvars.adj.fml = paste(xvars.adj,collapse="+")
      fml = stats::as.formula(paste("Surv(", yvar, ",", censorvar,") ~ ", grpvar, "+",xvars.adj.fml))
      res.cox = try(Anova(coxph(fml, data=data), type=3, test.statistic="Wald"), silent=T)
      if(!inherits(res.cox,"try-error")){
        pval = res.cox[grpvar,"Pr(>Chisq)"]

      }else{
        pval = as.numeric(NA)
      }

      comp.res = data.frame(cox.anova.pvalue = pval)
      comp.res.display = comp.res
      comp.res.display$cox.anova.pvalue = round(comp.res.display$cox.anova.pvalue, digits=4)

      header = data.frame(Response=yvar.display, Group.Variable=grpvar.display)
      comp.res = cbind(header, comp.res)
      comp.res.display = cbind(header, comp.res.display)

      res = list(comp.res=comp.res, comp.res.display=comp.res.display)


    }else if(type=="c"){
      xvars.adj.fml = paste(xvars.adj,collapse="+")
      fml = stats::as.formula(paste(yvar,"~", grpvar, "+",xvars.adj.fml))
      lm.fit = try(Anova(stats::lm(fml, data=data), type=3), silent=T)

      if(!inherits(lm.fit,"try-error")){
        pval = lm.fit[grpvar, "Pr(>F)"]
      }else{
        pval = as.numeric(NA)
      }

      comp.res = data.frame(Response=yvar.display, Group.Variable=grpvar.display,
                            lm.anova.pvalue = pval)
      comp.res.display = comp.res
      comp.res.display$lm.anova.pvalue = round(comp.res.display$lm.anova.pvalue, digits=4)

      res = list(comp.res=comp.res, comp.res.display=comp.res.display)


    }else if(type=="b"){
      if(!all(data[,yvar]%in%c(0,1,NA)) | !all(c(0,1)%in%data[,yvar])) stop("Response needs to be 0/1.")

      xvars.adj.fml = paste(xvars.adj,collapse="+")
      fml = stats::as.formula(paste(yvar,"~", grpvar, "+",xvars.adj.fml))
      glm.fit = try(Anova(glm(fml, family=stats::binomial, data=data), type=3, test.statistic="Wald"), silent=T)

      if(!inherits(glm.fit,"try-error")){
        pval = glm.fit[grpvar, "Pr(>Chisq)"]
      }else{
        pval = as.numeric(NA)
      }

      comp.res = data.frame(Response=yvar.display, Group.Variable=grpvar.display,
                            glm.anova.pvalue = pval)
      comp.res.display = comp.res
      comp.res.display$glm.anova.pvalue = round(comp.res.display$glm.anova.pvalue, digits=4)

      res = list(comp.res=comp.res, comp.res.display=comp.res.display)



    }else{
      stop("type can only be c, b or s.")
    }

  }

  res
}

Try the BioPred package in your browser

Any scripts or data that you put into this service are public.

BioPred documentation built on Nov. 4, 2024, 9:06 a.m.