### utils-formula.R ---
##----------------------------------------------------------------------
## Author: Brice Ozenne
## Created: mar 23 2021 (09:41)
## Version:
## Last-Updated: jul 16 2024 (13:41)
## By: Brice Ozenne
## Update #: 340
##----------------------------------------------------------------------
##
### Commentary:
##
### Change Log:
##----------------------------------------------------------------------
##
### Code:
## * addLeading0
##' @description Add leading 0 to a numeric vector
##' @noRd
##' @examples
##' addLeading0(c(1,9,19))
##' addLeading0(c(1,9,100))
##' addLeading0(c(-13,-1,1,13))
addLeading0 <- function(object, as.factor = FALSE, code = NULL){
if(!is.numeric(object)){
stop("Argument \'object\' must be a numeric vector. \n")
}
if(is.null(code)){
n.0 <- ceiling(log10(max(abs(object)))+0.1)
code <- paste0("%0",n.0,"d")
}
out <- sprintf(code, abs(object))
if(any(object<0)){
out[object<0] <- paste0("-",out[object<0])
}
if(as.factor){
out <- as.factor(out)
}
attr(out,"code") <- code
return(out)
}
## * approxAUC (documentation)
##' @title AUC: numerical integral of a function
##' @description Evaluate the Area Under the Curve (AUC) based on discrete observations y = f(x),
##' using interpolation of order 0 (step), 1 (trapezoidal), or 3 (natural cubic splines).
##' The AUC is the integral of a function over an interval [from,to].
##'
##' @param x [numeric vector] x-values.
##' @param y [numeric vector] image of the x-values by the function.
##' @param from [numeric] lower border of the intergration domain
##' @param to [numeric] upper border of the intergration domain
##' @param method [character] the type of interpolation: \code{"trapezoid"}, \code{"step"} or \code{"spline"}.
##' @param subdivisions [integer] number of subdivisions to be used when performing numerical integration of the spline.
##' Only relevant when \code{method="spline"}.
##' @param name [character] how to name the output. Can be set to \code{NULL} or \code{FALSE} to output a numeric without name.
##' @param na.rm [logical] in presence of missing values, should complete.cases of \code{x} and \code{y} will be used?
##'
##' @details This function is a simplified version of the \code{AUC} function from the DescTools package.
##'
##' @return a numeric value.
##'
##' @examples
##' ## same examples as DescTools::AUC
##' approxAUC(x=c(1,3), y=c(1,1), from = 1, to = 3)
##'
##' approxAUC(x=1:3, y=c(1,2,4), from = 1, to = 3)
##' approxAUC(x=1:3, y=c(1,2,4), from = 1, to = 3, method = "step")
##'
##' x <- seq(0, pi, length.out=200)
##' approxAUC(x=x, y=sin(x), from = 0, to = pi)
##' approxAUC(x=x, y=sin(x), from = 0, to = pi, method = "spline")
##'
## * approxAUC (code)
##' @export
approxAUC <- function (x, y, from, to, method = "trapezoid", subdivisions = 100, name = "AUC", na.rm = FALSE){
## ** normalize user input
if (na.rm & (any(is.na(x)) | any(is.na(y)))) {
idx <- stats::complete.cases(cbind(x, y))
x <- x[idx]
y <- y[idx]
}
if (length(x) < 2){
stop("Cannot compute an area under the curve with 2 or less timpoints. \n")
}
if(from < min(x) | to > max(x)){
message("Cannot compute an area under the curve w.r.t. timepoints before the first observation or after the last observation. \n")
}
method <- match.arg(method, choices = c("trapezoid", "step", "spline"))
## ** evalute auc
neworder <- order(x)
x.order <- x[neworder]
y.order <- y[neworder]
if (method == "trapezoid") {
values <- stats::approx(x.order, y.order, xout = sort(unique(c(from, to, x.order[x.order > from & x.order < to]))))
res <- 0.5 * sum(diff(values$x) * (values$y[-1] + values$y[-length(values$y)]))
}else if (method == "step") {
values <- stats::approx(x.order, y.order, xout = sort(unique(c(from, to, x.order[x.order > from & x.order < to]))))
res <- sum(diff(values$x) * values$y[-length(values$y)])
}else if (method == "spline") {
res <- stats::integrate(stats::splinefun(x.order, y.order, method = "natural"), lower = from, upper = to, subdivisions = subdivisions)$value
}
## ** output
if(!is.null(name) && !identical(name,FALSE)){
names(res) <- name
}
return(res)
}
## * countChar
##' @description Count the number of time a character appears
##'
##' @param value [character]
##' @param pattern [character]
##'
##' @noRd
##' @examples
##' countChar("~1|id","|")
##' countChar("~(1|id) + (time|id)","|")
##' countChar("~(1|id) + (time|id)","time")
countChar <- function(value, pattern, fixed = TRUE){
lengths(regmatches(value, gregexpr(pattern, value, fixed = fixed)))
}
## * dchol
##' @title Jacobian of the Cholesky factor
##' @description Jacobian of the Cholesky factor
##' @param object [matrix] matrix relative to which the cholesky factor and its derivative should be evaluated.
##' @param chol [logical] Is the argument object the cholesky factor?
##' @noRd
##' @examples
##' p <- 5
##' Msym <- cov(matrix(rnorm(1000*p),ncol=p))
##' chol(Msym)
##' test <- dchol(Msym)
##' test
##'
##' ## comparison with numerical derivative
##' mychol <- function(x){
##' vec(chol(matrix(x, nrow = sqrt(length(x)), ncol = sqrt(length(x)))))
##' }
##' range(numDeriv::jacobian(mychol, vec(Msym)) - test)
##'
##' ## delta method
##' data(gastricbypassL, package = "LMMstar")
##' e.lmm <- lmm(weight ~ visit + (1|id), data = gastricbypassL)
##'
##' GS <- estimate(e.lmm, transform.sigma = "log", transform.rho = "atanh", function(p){ ## p <- NULL
##' as.vector(chol(sigma(e.lmm, p = p)))
##' }, method.numDeriv = "Richardson")
##' dOmega.chol <- dchol(e.lmm$Omega[[1]], dobject = e.lmm$dOmega[[1]], chol = FALSE)
##'
##' range(dOmega.chol$sigma - matrix(attr(GS,"grad")[,"sigma"],e.lmm$time$n,e.lmm$time$n))
##' range(dOmega.chol[["rho(id)"]] - matrix(attr(GS,"grad")[,"rho(id)"],e.lmm$time$n,e.lmm$time$n))
##'
dchol <- function(object, dobject = NULL, chol = FALSE, ...){
## ** normalize user input
if(!is.matrix(object)){
stop("Argument \'object\' should be a matrix. \n")
}
p <- dim(object)
if(p[1]!=p[2]){
stop("Argument \'object\' should be a squared matrix (same number of rows and columns). \n")
}
p2 <- prod(p)
if(chol){
object.chol <- object
object <- crossprod(object.chol)
}else{
object.chol <- chol(object)
}
if(any(abs(object.chol[lower.tri(object.chol)]>1e-12))){
stop("Argument \'object\' should be the (upper) cholesky factor of a squared symmetric matrix when argument \'chol\' is TRUE. \n")
}
## ** deal with special case
if(p[1]==1){
return(1/(2*object.chol))
}
## ** commutation matrix (K)
## such that vec(C) = K vec(\trans(C))
indexN0.com <- expand.grid(row = 1:p[1], col = 1:p[1])
indexN0.com$comrow <- p[1]*(indexN0.com[,"row"]-1)+indexN0.com[,"col"]
indexN0.com$comcol <- p[1]*(indexN0.com[,"col"]-1)+indexN0.com[,"row"]
## sanity check
## Mcom <- matrix(0, nrow = p2, ncol = p2)
## Mcom[indexN0.com$comrow + (indexN0.com$comcol-1)*p2] <- 1
## range(Mcom - matrixcalc::commutation.matrix(p[1]))
## ** elimination matrix (E)
## such athat vech(dX) = E vec(X)
## [a b b c] --> [a b c]
p.n0 <- p[1]*(p[1]+1)/2
indexN0.elim <- indexN0.com[indexN0.com$row >= indexN0.com$col,,drop=FALSE]
## sanity check
## Melim <- matrix(0, nrow = p.n0, ncol = p2)
## Melim[1:p.n0 + (indexN0.elim$comcol-1)*p.n0] <- 1
## tMelim <- matrix(0, nrow = p2, ncol = p.n0)
## tMelim[indexN0.elim$comcol + (1:p.n0-1)*p2] <- 1
## range(Melim - matrixcalc::elimination.matrix(p[1]))
## range(t(Melim) - tMelim)
## Melim %*% vec(x)
## ** differentiation choleski transform
Minterim <- t(object.chol) %x% diag(1,nrow=p[1])
out <- matrix(0, nrow = p2, ncol = p2)
out[indexN0.elim$comrow,indexN0.elim$comrow] <- solve((Minterim + Minterim[indexN0.com$comrow,])[indexN0.elim$comrow,][,indexN0.elim$comcol])
## SHORT FOR
## https://mathoverflow.net/questions/150427/the-derivative-of-the-cholesky-factor
## GS <- solve(Melim %*% (diag(1,nrow=p2) + Mcom) %*% (t(object.chol) %x% diag(1,nrow=p[1])) %*% tMelim)
## range(GS - solve((Minterim + Minterim[indexN0.com$comrow,])[indexN0.elim$comrow,][,indexN0.elim$comcol]))
## INCOMPLETE JUSTIFICATION
## https://math.stackexchange.com/questions/2158399/derivative-of-symmetric-positive-definite-matrix-w-r-t-to-its-lower-triangular
## X = C\trans{C}
## dX = dC\trans{C} + Cd\trans{C}
##
## use that vec(ABC) = (\trans(C) %x% A) vec(B)
##
## vec(dX) = (C %x% I) vec(dC) + (I %x% C) vec(d\trans{C})
## = [ (C %x% I) + (I %x% C)K ] vec(dC)
## vech(dX) = Ex vec(X) = Ex [ (C %x% I) + (I %x% C)K ] vec(dC)
## = Ex [ (C %x% I) + (I %x% C)K ] Ec vech(dC)
## vech(dX)/vech(dC) = = Ex [ (C %x% I) + (I %x% C)K ] Ec
## ** dobject
if(!is.null(dobject)){
if(is.matrix(dobject)){
dobject <- list(dobject)
}else if(any(unlist(lapply(dobject,is.matrix))==FALSE)){
stop("Argument \'dobject\' should be a matrix or list of matrix. \n")
}
if(any(unlist(lapply(dobject,function(iO){identical(dim(iO),p)}))==FALSE)){
stop("Argument \'dobject\' has dimensions incompatible with argument \'object\'. \n")
}
index.upperTri <- which(upper.tri(object, diag = TRUE))
out.upperTri <- out[index.upperTri,index.upperTri,drop=FALSE]
out <- lapply(dobject, FUN = function(iM){ ## iM <- dobject[[1]]
iOut <- matrix(0, nrow = p[1], ncol = p[2])
iOut[index.upperTri] <- rowSums(sweep(out.upperTri, MARGIN = 2, FUN = "*", STATS = iM[index.upperTri]))
return(iOut)
})
## out <- lapply(dobject, FUN = function(iM){ ## iM <- dobject[[1]]
## iOut <- matrix(rowSums(sweep(out, MARGIN = 2, FUN = "*", STATS = as.double(iM))),
## nrow = p[1], ncol = p[2])
## return(iOut)
## })
}
## ** export
return(out)
}
## * groupSet
##' @title Group Sets
##' @description Find groups of sets sharing the same values
##' @noRd
##'
##' @param object a list
##'
##'
##' @examples
##' mylistA <- list(1:2,3:4,1:5)
##' groupSet(mylistA)
##'
##' mylistB <- list(1:2,2:3,2,3)
##' groupSet(mylistB)
##'
##' mylistC <- list(1:2,1,2,2,3,2:3)
##' groupSet(mylistC, strata = c(1,1,1,2,2,2))
##'
##' mylistD <- list(1:2,1,2,2,3,2:3,4,4:5,6)
##' groupSet(mylistD, strata = c(1,1,1,2,2,2,2,2,3))
groupSet <- function(object, strata = NULL){
n.set <- length(object)
out <- rep(NA, n.set)
## ** handle strata
if(!is.null(strata)){
previous.group <- 0
U.strata <- unique(strata)
n.strata <- length(U.strata)
index.strata <- tapply(1:length(object), strata, identity)
for(iS in 1:n.strata){ ## iS <- 1
iOut <- groupSet(object[index.strata[[iS]]])
out[index.strata[[iS]]] <- iOut + previous.group
previous.group <- previous.group + max(iOut)
}
return(out)
}
## ** initialize with the largest element
order.set <- order(lengths(object), decreasing = TRUE)
out[order.set[1]] <- 1
## ** deal with special case
if(n.set==1){
return(out)
}
## ** find representative patterns
Uvalue <- unique(unlist(object))
M.group <- rbind(Uvalue %in% object[[order.set[1]]])
index.group <- order.set[1]
if(all(M.group)){ ## another special case
out[] <- 1
return(out)
}
for(iSet in order.set[-1]){ ## iSet <- 2
iTest <- Uvalue %in% object[[iSet]]
iContrast <- sweep(M.group, MARGIN = 2, FUN = "-", STATS = iTest)
if(any(rowSums(iContrast!=0)==0)){ ## since patterns are looped over with decreasing length, equality can only mean pattern with fewer elements
out[iSet] <- out[index.group[which(rowSums(iContrast!=0)==0)[1]]]
}else if(any(rowSums(iContrast<0)==0)){ ## subset of another pattern (possibly several but label as first one)
out[iSet] <- out[index.group[which(rowSums(iContrast<0)==0)[1]]]
}else if(any(rowSums(iContrast>0)==0)){ ## contain other patterns
iMerge <- which(rowSums(iContrast>0)==0)
out[c(which(out %in% iMerge),iSet)] <- out[index.group[iMerge[1]]]
index.group[iMerge[1]] <- iSet
M.group[iMerge[1],] <- iTest
if(length(iMerge)>1){ ## handle the case where merging two covaraince structures into a bigger one, e.g. A,B and A,C into A,B,C
index.group <- index.group[-iMerge[-1]]
M.group <- M.group[-iMerge[-1],,drop=FALSE]
out <- as.numeric(as.factor(out))
}
}else{ ## new pattern
out[iSet] <- length(index.group)+1
index.group <- c(index.group, iSet)
M.group <- rbind(M.group, iTest)
}
}
## ** export
return(out)
}
## * is.invertible
##' @description check whether a matrix is invertible
##'
##' @examples
##' M <- matrix(1:9,3,3)
##' is.invertible(M, cov2cor = FALSE)
##' @noRd
is.invertible <- function(object, cov2cor, tol = 10^(-10*sqrt(NCOL(object)))){
if(any(is.na(object))){
return(FALSE)
}
if(cov2cor){
if(any(diag(object)<=0)){
return(FALSE)
}else{
object <- stats::cov2cor(object)
if(any(is.na(object))){
return(FALSE)
}
}
}
return(abs(det(object))>tol)
}
## * ncharTable
##' @description compute the width of a table
##'
##' @param object [data.frame or matrix] table
##' @param digits [integer, >0] digits used to round the numeric value
##' @param format [character] how to display numeric value (100, 1e2, ...). See \code{\link{base::formatC}}.
##' @noRd
##'
##' @examples
##' df <- data.frame(name = c("ah","ohhh"), value = c(101.501,200.510))
##' ncharTable(df) ## formatC(1.1, format = "g")
##' ncharTable(df, digits = 7)
##' ncharTable(df, digits = 1)
##'
##' df2 <- data.frame(name = c("ah","ohhh"), value = c("101.501","200.510"))
##' ncharTable(df2)
##'
ncharTable <- function(object, digits, format = "f"){
if(is.matrix(object)){
if(missing(digits)){
xplus <- cbind(rownames(object),formatC(object, format = format))
}else{
xplus <- cbind(rownames(object),formatC(object, digits = digits, format = format))
}
}else if(is.list(object)){
test.num <- sapply(object,is.numeric)
object.num <- object[test.num]
if(missing(digits)){
object.num <- formatC(as.matrix(object.num), format = format)
}else{
object.num <- formatC(as.matrix(object.num), digits = digits, format = format)
}
object.char <- object[!test.num]
xplus <- cbind(rownames(object))
if(length(object.char)>0){
xplus <- cbind(xplus, object.char)
}
if(length(object.num)>0){
xplus <- cbind(xplus, object.num)
}
}
nchar.colnames <- c(0,nchar(colnames(object)))
width <- apply(xplus,1, function(iRow){ ## iRow <- xplus[1,]
sum(pmax(nchar.colnames,nchar(trimws(iRow)))+1, na.rm = TRUE)-1
})
return(max(width))
}
## * orderLtoR
##' @description order the lines of a matrix according to the column value from left to right
##' @noRd
##'
##' @examples
##' df <- rbind(data.frame(time = c("a","b","c","d"), gender = "M"),
##' data.frame(time = c("a","b","c","d"), gender = "F"))
##'
##' X0 <- unique(model.matrix(~1, df))
##' X1 <- unique(model.matrix(~time, df))
##' X2 <- .model.matrix_regularize(~0+gender + time:gender, data = df, augmodel = TRUE)
##'
##' orderLtoR(X0)
##' orderLtoR(X1)
##' orderLtoR(X2, strata = attr(X2,"M.level")$gender)
##' orderLtoR(X2[8:1,], strata = attr(X2,"M.level")$gender)
orderLtoR <- function(object, strata = NULL){
if(is.null(strata)){
new.order <- rev(1:NCOL(object))
}else{
new.order <- unlist(rev(tapply(colnames(object),strata,rev, simplify = FALSE)))
}
out <- as.numeric(nlme::collapse(object[,new.order,drop=FALSE], sep=""))
## object[order(out),]
return(out)
}
## * triplicated
##' @description identify third (or more) occurence of a value in a vector.
##' @noRd
##' @examples
##' triplicated(c(1:3,1:3))
##' triplicated(c(1:3,1:3,1:3))
##' triplicated(c(NA, 1:3, 3, 4:6, 3, NA, 4))
##' triplicated(c(NA, 1:3, 3, 4:6, 3, NA, 4, 3:4))
triplicated <- function (x){
out <- rep(FALSE,length(x))
index.duplicated <- which(base::duplicated(x))
if(length(index.duplicated)==0){return(out)}
x.duplicated <- x[index.duplicated]
out[index.duplicated[base::duplicated(x.duplicated)]] <- TRUE
return(out)
}
## * tr
##' @description compute the trace
##' @noRd
##'
##' @examples
##' M <- matrix(1:9,3,3)
##' tr(M)
tr <- function(object){
sum(diag(object))
}
## * unorderedPairs
##' @title Form All Pairs
##' @description Form all pairs of values
##' @noRd
##'
##' @param x vector of values
##' @param distinct [logical] should pairs containing the same value be removed?
##'
##' @details adapted from RecordLinkage package
##'
##' @examples
##' unorderedPairs(1, distinct = FALSE)
##' unorderedPairs(1, distinct = TRUE)
##'
##' unorderedPairs(1:5, distinct = TRUE) - utils::combn(5, m = 2)
##' unorderedPairs(1:5, distinct = FALSE)
##' unorderedPairs(rep(1,5), distinct = TRUE) - (utils::combn(5, m = 2)>0)
##'
unorderedPairs <- function(x, distinct = FALSE){
n.x <- length(x)
## work on integers
y <- 1:n.x
out <- do.call(cbind,lapply(1:n.x, function(iK) {
rbind(y[iK], y[iK:n.x])
}))
## remove 'diagonal' pairs (e.g. (1,1) or (2,2))
if(distinct){## same as combn but faster when x is large
if(all(out[1,]==out[2,])){
return(NULL)
}else{
out <- out[,out[1,]!=out[2,],drop=FALSE]
}
}
## restaure original values
out[] <- x[as.vector(out)]
return(out)
}
## * sdiag (copied from the mgcv package)
##' @title Index of Diagonals of a Matrix
##' @description index of the diagonal, sub-diagonal, or super diagonal.
##' @param A [matrix]
##' @param k [integer] type of diagonal to be consider
##' @noRd
##' @examples
##'
##' sdiag(matrix(NA, 3, 3), 1)
sdiag <- function (A, k = 0){
p <- ncol(A)
n <- nrow(A)
if (k > p - 1 || -k > n - 1)
return()
if (k >= 0) {
i <- 1:n
j <- (k + 1):p
}
else {
i <- (-k + 1):n
j <- 1:p
}
if (length(i) > length(j))
i <- i[1:length(j)]
else j <- j[1:length(i)]
return(i + (j - 1) * n)
}
##----------------------------------------------------------------------
### utils-formula.R ends here
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.