R/concurvity_new.R
In gamlss.ggplots: Plotting Functions for Generalized Additive Model for Location Scale and Shape

##################################################################################
##################################################################################
##################################################################################
##################################################################################
# Mikis concurvity 
# What are the arguments?
# data   :is a data frame 
# full   : whether to condition to all the rest of variables or variable wise comparison 
# linear : linear part of the model (not implemented) 
#          If linear is TRUE we will have to separate the linear 
#          and the non-linear part. This will be using the function 
#          getZmatrix(). The linear part will be included in the 
#          beginning to get her woth one It fails to produce anything sensible
#          see function concurvityL() in Concurvity_with_linear_option.R
#--------------------------------------------------------------------------------
# to do
#   i) at the moment there is no data argument 
#  ii) no full option (OK fixed)
# iii) no linear part yest but  vector of ones is added 
#  iv)  the ndx in bbase has to change to something more meaningful
#      maybe nknots
#   v) maybe we should introduce formulea for both X and Linear
#################################################################################
#################################################################################
#################################################################################
#################################################################################
# authors Mikis Stasinopoulos
# created fist Jan 2018 
# revisted by Mikis Dec 2018
# revised on Marcg 2022
#--------------------------------------------------------------
#-----------------------------------------------------------------
get_concurvity <- function(data, full=TRUE,  ...)
{
  #-----------------------------------------------------------------
  #####local function
  #-----------------------------------------------------------------  
  bbasis <- function(x, xl=NULL, xr=NULL, inter=15, deg=3)
  {
    # DS xl= min, xr=max, inter= number of points within 
    # Construct B-spline basis
    tpower <- function(x, t, p) (x - t) ^ p * (x > t)
    xl <- if(is.null(xl)) min(x)
    xr <- if(is.null(xr)) max(x)
    xmax <- xr+0.01*(xr-xl)
    xmin <- xl-0.01*(xr-xl)
    dx <- (xr - xl) / inter # DS increment 
   # if false use Paul's
     
      knots <- seq(xl - deg * dx, xr + deg * dx, by = dx)
      P <- outer(x, knots, tpower, deg)# calculate the power in the knots
      n <- dim(P)[2]
      D <- diff(diag(n), diff = deg + 1) / (gamma(deg + 1) * dx ^ deg) # 
      B <- (-1) ^ (deg + 1) * P %*% t(D) 
    
    return(resid(lm(B~1))) #
  }
################################################################################
################################################################################
################################################################################
  if (missing(data) || NROW(data) <= 1) 
    stop("nothing to do for this model")
    
      dimD <- dim(data)
      daTa <- subset(data, select=ifelse(sapply(data,is.factor)|sapply(data,is.character)==TRUE, FALSE, TRUE))
       Dim <- dim(daTa)
  diffDim  <- dimD[2]-Dim[2]
if (diffDim > 0)
  {
    warning(cat(diffDim, 'factors have been omited from the data', "\n"))
}
  
   cnames <-  names(daTa)
  if (is.null(cnames)) cnames <- as.vector(paste("V", seq(1,NCOL(x)), sep=""))
       n <- dim(daTa)[1]
       m <- dim(daTa)[2]
  if (m==1) {x <- as.matrix(daTa)}
    stop <- start <- rep(1, m) 
  #------------------------------------------------
  for (i in 1:m) # start loop
  {
              B2 <- as.matrix(bbasis(daTa[,i],...))
    colnames(B2) <- as.vector(paste(cnames[i], seq(1,dim(B2)[2]), sep="."))
               B <- if (i==1) B2 else cbind(B, B2, deparse.level=2)
          wherev <- grep(cnames[i],colnames(B))
        start[i] <- wherev[1]
         stop[i] <- wherev[length(wherev)]
  }          # finish loop
              B <- cbind(one=rep(1,dim(B)[1]), B) # add column of ones or possible linear
if (dim(B)[2]>n) stop("the number of colums is greater than n \n use smaller basis i.e. inter=5")        
           start <- c(1, start + 1)
            stop <- c(0, stop + 1)
              m  <- m+1
               B <- B[rowSums(is.na(B)) == 0, ]   # dim(B)
               B <- qr.R(qr(B, tol = 0, LAPACK = FALSE))
      n.measures <- 2
   measure.names <- c("worst",  "estimate")
if (full) 
  {  
    res <- matrix(0, n.measures, m)
    for (i in 1:(m)) 
    {
      Xi <- B[, -(start[i]:stop[i]), drop = FALSE]  #take all the rest
      Xj <- B[,   start[i]:stop[i], drop = FALSE]   #take the current basis
      if (ncol(Xi)>n) stop("the number of colums is greater than n \n use smaller basis i.e. inter=5")
      r <- ncol(Xi)   # the number of colums in the rest of the bases
      # here we take the R of QR of the combined Xi and Xj  but we exclude the Xi part
      # the whole R should have been a pxp matrix (where p is the combined number of bases)
      # but we save only  R pxm (where m is number of columns in the current basis)
      # r is p-m number-of all bases minus number-of current basis 
      R <- qr.R(qr(cbind(Xi, Xj), LAPACK = FALSE, tol = 0))[, -(1:r), drop = FALSE]
      # now we take R (mxp) and create from the  QR of R the triangular Rt (pxp) matrix       
      Rt <- qr.R(qr(R))
      # mikis I am not sure if d[3]^2 is correct it used to be d[1]^2  
      # note that forwardsolve solves Ax=b for x when A is a lower triangular  
      res[1, i] <- svd(forwardsolve(t(Rt), t(R[1:r, ,drop = FALSE])))$d[1]^2
      res[2, i] <- sum(R[1:r, ]^2)/sum(R^2)# fitted sum of sqares / totall sum of squares
    }
    colnames(res) <- c("one", cnames)
    rownames(res) <- measure.names
  } else 
  {
    res <- list()
    for (i in 1:n.measures) res[[i]] <- matrix(1, m, m)
    for (i in 1:m) {
      Xi <- B[, start[i]:stop[i], drop = FALSE]
      r <- ncol(Xi)
      for (j in 1:m) if (i != j) {
        Xj <- B[, start[j]:stop[j], drop = FALSE]
        R <- qr.R(qr(cbind(Xi, Xj), LAPACK = FALSE, tol = 0))[, -(1:r), drop = FALSE]
        Rt <- qr.R(qr(R))
        res[[1]][i, j] <- svd(forwardsolve(t(Rt), t(R[1:r, , drop = FALSE])))$d[1]^2
        #beta <- b$coef[start[j]:stop[j]]
        #conc[[2]][i, j] <- sum((R[1:r, , drop = FALSE] %*% 
        #                         beta)^2)/sum((Rt %*% beta)^2)
        res[[2]][i, j] <- sum(R[1:r, ]^2)/sum(R^2)
        rm(Xj, R, Rt)
      }
    }
    for (i in 1:n.measures) rownames(res[[i]]) <- colnames(res[[i]]) <- c("one", cnames)
    names(res) <- measure.names
  }
  res  
}
##################################################################################
##################################################################################
##################################################################################
##################################################################################
data_concurvity <- function(data, 
                     digits = 2,
                       type = c("estimate", "worst"),
                   diag.off = FALSE,
              lower.tri.off = FALSE, 
                       plot = TRUE,
                     method = c("circle","square"),
              outline.color = "gray",
                     colors = c("blue", "white", "red"),
               legend.title = "Concurvity",
                     title,
                    ggtheme = theme_minimal(),
                     tl.cex = 12,
                     tl.col = "black", 
                     tl.srt = 45,
                        lab = FALSE, 
                    lab_col = "black", 
                   lab_size = 3,
              ...) 
{
  meltit <- function(mat)
  {
    rna <- rownames(mat)
    lrna <- length(rna)
    value <- as.vector(mat)
    Var1 <- gl(length(rna), 1, length = lrna*lrna, labels=rna)
    Var2 <- gl(length(rna), lrna, length = lrna*lrna, labels=rna)
    daf <-  na.omit(data.frame(Var1, Var2, value=value)) 
    daf
  }      
  #require(reshape2)  
         dimD <- dim(data)
         daTa <- subset(data,  select=ifelse(sapply(data,is.factor)|sapply(data,is.character)==TRUE, FALSE, TRUE))
          Dim <- dim(daTa)
     diffDim  <- dimD[2]-Dim[2]
if (diffDim > 0)
  {
    warning(cat(diffDim, 'factors have been omited from the data', "\n"))
}
           type <- match.arg(type)
             CC <- get_concurvity(daTa, full=FALSE, ...)[[type]]
             CC <- base::round(x = CC, digits = digits)
       if ( diag.off) diag(CC) <- NA
       if (lower.tri.off)  CC[lower.tri(CC)]<-NA
      txt.title <- if (missing(title))  
                   paste("Concurvity from data",deparse(substitute(data)))
                   else title  
if (plot==FALSE)  return(CC)
        method <- match.arg(method)
          corr <- meltit(CC)
colnames(corr) <- c("var_1", "var_2", "value")
 corr$abs_corr <- abs(corr$value) * 10
      p <- ggplot(data = corr, 
              mapping = aes_string(x = "var_1", y = "var_2", fill = "value"))
  if (method == "square") {
    p <- p + geom_tile(color = outline.color)
  }
  else if (method == "circle") {
    p <- p + geom_point(color = outline.color, shape = 21, 
                        aes_string(size = "abs_corr")) +
      scale_size(range = c(4, 10)) +
      guides(size = "none")
  }
  label <- round(x = CC, digits = digits)               
  p <- p + scale_fill_gradient2(low = colors[1], high = colors[3], 
                                mid = colors[2], midpoint = 0.5, limit = c(0, 1), space = "Lab",
                                name = legend.title)+ggtitle(txt.title)
  if (class(ggtheme)[[1]] == "function") {
    p <- p + ggtheme()
  }
  else if (class(ggtheme)[[1]] == "theme") {
    p <- p + ggtheme
  }
  p <- p + theme(axis.text.x = element_text(angle = tl.srt, 
                                            vjust = 1, size = tl.cex, hjust = 1), 
                 axis.text.y = element_text(size = tl.cex)) + 
    coord_fixed()
  label <- round(x = corr[, "value"], digits = digits)  
  if (lab) {
    p <- p + ggplot2::geom_text(mapping = aes_string(x = "var_1", 
                                                     y = "var_2"), 
                                label = label, color = lab_col, size = lab_size)
  }
  p
}

##################################################################################
##################################################################################
##################################################################################
##################################################################################

Any scripts or data that you put into this service are public.

gamlss.ggplots documentation built on May 29, 2024, 1:34 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

gamlss.ggplots
Plotting Functions for Generalized Additive Model for Location Scale and Shape

R/concurvity_new.R
In gamlss.ggplots: Plotting Functions for Generalized Additive Model for Location Scale and Shape

Defines functions data_concurvity get_concurvity

Try the gamlss.ggplots package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

gamlss.ggplots Plotting Functions for Generalized Additive Model for Location Scale and Shape

R/concurvity_new.R In gamlss.ggplots: Plotting Functions for Generalized Additive Model for Location Scale and Shape

Defines functions data_concurvity get_concurvity

Try the gamlss.ggplots package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

gamlss.ggplots
Plotting Functions for Generalized Additive Model for Location Scale and Shape

R/concurvity_new.R
In gamlss.ggplots: Plotting Functions for Generalized Additive Model for Location Scale and Shape