R/clpm_gam_c.R
In crosslag: Perform Linear or Nonlinear Cross Lag Analysis

Documented in clpm_gam_c

#' Title Nonlinear Cross Lag Analysis
#'
#' @param xname If nonlinear cross lagged analysis is used between x and y, 'xname' is the name of x
#' @param yname If nonlinear cross lagged analysis is used between x and y, 'yname' is the name of y
#' @param data.x1 A numeric variable.
#' @param data.y1 A numeric variable. 'data.x1' and 'data.y1' comes from the first time point
#' @param data.xt A numeric variable.
#' @param data.yt A numeric variable. 'data.xt' and 'data.yt' comes from the another time point
#'
#' @return A list containing two elements
#' \item{clpm_gam_result}{a dataframe containing the result of nonlinear cross lag analysis}
#' \item{clpm_gam_plot}{a list of plot generated by 'ggplot' and 'ggarrange', you can use 'ggarrange(clpm_gam_plot[[2]])' to plot it}
#' @importFrom ggpubr ggarrange
#' @importFrom mgcv gam
#' @import ggplot2
#' @export
#' @examples
#' data(test_data1)
#' data(test_data2)
#' result <- clpm_gam_c(xname="ASI",yname = "PWRI",
#'                      data.x1 = test_data1$ASI,data.y1 = test_data1$PWRI,
#'                      data.xt = test_data2$ASI,data.yt = test_data2$PWRI)
clpm_gam_c <- function(xname, yname, data.x1,data.y1,data.xt,data.yt){
  #initializes an empty list used to store ggplot objects
  #Each ggplot object will be added to this list as the function progresses, and the final list will be included in the output of the function the results.
  p_list <- list()
  #checks the class of the input variables to ensure they meet the expected data types
  if(!inherits(data.x1,"numeric",which = FALSE))
    stop(" data.x1 must be of class 'numeric' ")
  if(!inherits(data.y1,"numeric",which = FALSE))
    stop(" data.y1 must be of class 'numeric' ")
  if(!inherits(data.xt,"numeric",which = FALSE))
    stop(" data.xt must be of class 'numeric' ")
  if(!inherits(data.yt,"numeric",which = FALSE))
    stop(" data.yt must be of class 'numeric' ")
  if(!inherits(xname,"character",which = FALSE))
    stop(" xname must be of class 'character' ")
  if(!inherits(yname,"character",which = FALSE))
    stop(" yname must be of class 'character' ")
  #create new variables with more concise names for easier reference within the function `clpm_gam_c`
  X1 <- data.x1
  Y1 <- data.y1
  Xt <- data.xt
  Yt <- data.yt
  #fit a Generalized Additive Model (GAM) using the `gam` function from the `mgcv` package.
  fit <- mgcv::gam(Xt ~ s(Y1, k=4) + X1)
  #store the summary of the fitted Generalized Additive Model (GAM) in the variable `s`.
  #This summary typically includes various statistics and information, such as the estimated coefficients, degrees of freedom, p-values, and other diagnostic measures.
  s <- summary(fit)
  #Calculating the residuals of the model fit.
  r <- Xt - predict(fit)
  #calculates the root mean squared residual (rmr) of the model fit.
  #The rmr is a measure of the average discrepancy between the observed values and the values predicted by the model.
  rmr <- sqrt(mean(r^2))
  chidf <- s$chi.sq/s$s.table[1]
  #create a data frame named `result0` with the following columns:
  result0 <- data.frame(lhs="Xt", op="~", rhs="Y1", edf=s$s.table[1], f=s$s.table[3],
                        p=s$s.table[4], x=xname, y=yname, r.sq=s$r.sq, gcv=s$sp.criterion[1],
                        rmr=rmr, chisqdf=chidf)
  #create a ggplot object `p` that represents a scatter plot with a smoothed line generated by a Generalized Additive Model (GAM).
  p <- ggplot2::ggplot(mapping = aes(x = Y1, y = Xt))+
    geom_smooth(mapping = aes(color = "gam"), method = "gam")+
    xlab(paste0(yname))+
    ylab(paste0(xname))+
    theme_bw()+
    theme(legend.position="none")
  #add a new ggplot object `p` to the list `p_list`
  #fit anothor Model (GAM) using the `gam` function from the `mgcv` package.
  #In this model formula, `Yt` is the response variable being modeled as a smooth function of `X1` with a specified degree of smoothness (k=4) and an additional linear term involving `Y1`.
  #This GAM formulation allows for flexible modeling of the relationship between `Yt` and `X1` while incorporating the linear effect of `Y1` in the model.
  fit <- mgcv::gam(Yt ~ s(X1, k=4) + Y1)
  s <- summary(fit)
  r <- Yt - predict(fit)
  rmr <- sqrt(mean(r^2))
  chidf <- s$chi.sq/s$s.table[1]
  result1 <- data.frame(lhs="Yt", op="~", rhs="X1", edf=s$s.table[1], f=s$s.table[3],
                        p=s$s.table[4], x=xname, y=yname, r.sq=s$r.sq, gcv=s$sp.criterion[1],
                        rmr=rmr, chisqdf=chidf)
  # create a ggplot object `p` that represents a scatter plot with a smoothed line generated by a GAM
  p <- ggplot2::ggplot(mapping = aes(x = X1, y = Yt))+
    geom_smooth(mapping = aes(color = "gam"), method = "gam")+
    xlab(paste0(xname))+
    ylab(paste0(yname))+
    theme_bw()+
    theme(legend.position="none")
  p_list <- c(p_list, list(ggarrange(p)))
  # Stack the results from the two GAM model fits vertically into a single data frame for further output.
  rst <- rbind(result0,result1)
  # Modify the values in the columns `lhs` and `rhs` of the data frame `rst` to certain name
  rst$lhs[which(rst$lhs=="Xt")] <- paste0(xname,"_2")
  rst$lhs[which(rst$lhs=="Yt")] <- paste0(yname,"_2")
  rst$rhs[which(rst$rhs=="Y1")] <- paste0(yname,"_1")
  rst$rhs[which(rst$rhs=="X1")] <- paste0(xname,"_1")
  # Set the column `op` to contain the arrow symbol `-->` surrounded by spaces on both sides
  # To indicate a directional relationship from the left-hand side variable to the right-hand side variable.
  rst$op <- '   -->   '
  for (i in 1:nrow(rst)) {
    rst$lhs[i] <- paste0(rst$lhs[i],rst$op[i],rst$rhs[i])
  }
  # Resulte in a modified version of `rst` with those columns excluded.
  rst <- rst[,-c(2,3,4)]
  # Modify the column names of the data frame `rst` to indicate their actual meaning.
  names(rst)[names(rst) =="lhs"] <-"Relation"
  names(rst)[names(rst) =="p"] <-"pvalue"
  # Create a list named `output` that contains two elements: the results from the two GAM model and the plot with a smoothed line generated by a GAM
  output <- list(clpm_gam_result=rst, clpm_gam_plot=p_list)
  # the function will return the list when it is executed, providing the results of the function, including the GAM results and plots
  return(output)
}