R/linModelSelect.R

Defines functions linModelSelect

Documented in linModelSelect

#' Test Multiple Starting Levels For Linear Regression Model, Select Best And Plot   
#'
#' The aim of this function is to select the data suiting set of levels on the main input data to construct a linear regression model. 
#' 
#' @details
#' In real world measurements one may be confronted to the case of very low level analytes below the detection limit (LOD) and resulting read-outs fluctuate around around a common baseline (instead of \code{NA}). 
#' With such data it may be preferable to omit the read-outs for the lowest concentrations/levels of analytes if they are spread around a base-line value.
#' This function allows trying to omit all starting levels designed in \code{startLev}, then the resulting p-values for the linear regression slopes will be checked and the best p-value chosen. 
#' The input may also be a MArrayLM-type object from package \href{https://bioconductor.org/packages/release/bioc/html/limma.html}{limma} or from \code{\link{moderTestXgrp}} or \code{\link{moderTest2grp}}.
#' In the graphical representation all points assocoated to levels omitted are shown in light green.
#' For the graphical display additional information can be used : If the  \code{dat} is list or MArrayLM-type object, 
#'  the list-elements $raw (according to argument \code{lisNa} will be used to display points initially given as NA ad imputed lateron in grey.
#' Logarithmic (ie log-linear) data can be treated by settting argument \code{logExpect=TRUE}. Then the levels will be taken as exponent of 2 for the regression, while the original values will be displayed in the figure. 
#'
#' @param rowNa (character or numeric, length=1) if \code{character} rowname for line to be extracted from \code{dat}, if  \code{numeric} used as line-number of \code{dat}
#' @param dat (matrix, list or MArrayLM-object from limma) main input of which columns should get re-ordered, may be output from \code{\link{moderTestXgrp}} or \code{\link{moderTest2grp}}.
#' @param expect (numeric of character) the expected levels; if character, constant unit-characters will be stripped away to extact the numeric content
#' @param logExpect (logical) toggle to \code{TRUE} if the main data are logarithmic but \code{expect} is linear
#' @param startLev (integer) specify all starting levels to test for omitting here (multiple start sites for modelling linear regression may be specified to finally pick the best model)
#' @param lisNa (character) in case \code{dat} is list or MArrayLM-type object, the list-elements with these names will be used as $raw (for indicating initial \code{NA}-values,
#'   $datImp (the main quantitation data to use) and $annot for displaying the corresponding value from the "Accession"-column.	
#' @param plotGraph (logical) display figure
#' @param tit (character) optional custom title
#' @param pch (integer) symbols to use n optional plot; 1st for regular values, 2nd for values not used in regression
#' @param cexLeg (numeric) size of text in legend
#' @param cexSub (numeric) text-size for line (as subtitle) giving regression details of best linear model)
#' @param xLab (character) custom x-axis label
#' @param yLab (character) custom y-axis label
#' @param cexXAxis (character) \code{cex}-type for size of text for x-axis labels
#' @param cexYAxis (character) \code{cex}-type for size of text for y-axis labels
#' @param xLabLas (integer) \code{las}-type orientation of x-axis labels (set to 2 for vertical axix-labels)
#' @param cexLab (numeric) \code{cex}-type for size of text in x & y axis labels (will be passed to \code{cex.lab} in \code{plot()})
#' @param suppressWarnings (logical) suppress warnings from lm() that may appear when NAs get introduced
#' @param silent (logical) suppress messages
#' @param debug (logical) additional messages for debugging
#' @param callFrom (character) allow easier tracking of messages produced
#' @return This function returns a list with $coef (coefficients), $name (as/from input \code{rowNa}), $startLev the best starting level) 
#' @seealso \code{\link{moderTestXgrp}} for single comparisons, \code{\link[base]{order}}  
#' @examples
#' ## Construct data
#' li1 <- rep(c(4,3,3:6),each=3) + round(runif(18)/5,2)
#' names(li1) <- paste0(rep(letters[1:5], each=3), rep(1:3,6))
#' li2 <- rep(c(6,3:7), each=3) + round(runif(18)/5, 2)
#' dat2 <- rbind(P1=li1, P2=li2)
#' exp2 <- rep(c(11:16), each=3)
#' 
#' ## Check & plot for linear model 
#' linModelSelect("P2", dat2, expect=exp2)
#'
#' ## Log-Linear data
#' ## Suppose dat2 is result of measures in log2, but exp4 is not
#' exp4 <- rep(c(3,10,30,100,300,1000), each=3)
#' linModelSelect("P2", dat2, expect=exp4, logE=FALSE)    # bad
#' linModelSelect("P2", dat2, expect=exp4, logE=TRUE)
#' 
#' @export
linModelSelect <- function(rowNa, dat, expect, logExpect=FALSE, startLev=NULL, lisNa=c(raw="raw",annot="annot",datImp="datImp"), plotGraph=TRUE, 
  tit=NULL, pch=c(1,3), cexLeg=0.95, cexSub=0.85, xLab=NULL, yLab=NULL, cexXAxis=0.85, cexYAxis=0.9, xLabLas=1, cexLab=1.1, suppressWarnings=TRUE, silent=FALSE, debug=FALSE, callFrom=NULL)  {
  ##  test for linear models with option to start form multiple later levels (ie omitting some of the early levels)
  fxNa <- .composeCallName(callFrom, newNa="linModelSelect")
  if(!isTRUE(silent)) silent <- FALSE
  if(isTRUE(debug)) {silent <- FALSE; suppressWarnings <- FALSE} else debug <- FALSE

  argNa <- c(deparse(substitute(rowNa)), deparse(substitute(dat)), deparse(substitute(expect)))
  quantCol <- c("blue","tan2","grey")       # figure: 1st for 'used in regression', 2nd for 'not-used', 3rd for 'NA/imputed'
  legLab <- c("used in regresssion","not used","NA/imputed,used","NA/imputed,not used")  # for figure legend ..
  annoColNa <- c("Accession","GeneName")
  doSelect <- TRUE                    # flag to see if input/data are valid
  if(length(rowNa) <1) {warning(fxNa,"Argument 'rowNa=",argNa[1],"' contains emty object, nothing to do !"); doSelect <- FALSE}
  if(length(dat) <1) {warning(fxNa,"Argument 'dat=",argNa[1],"' contains emty object, nothing to do !"); doSelect <- FALSE}
  if(length(expect) <1) {stop(fxNa,"Argument 'expect=",argNa[3],"' contains emty object, nothing to do !"); doSelect <- FALSE
  } else if(is.character(expect)) {
    exp2 <- try(as.numeric(expect), silent=TRUE)
    if(inherits(exp2, "try-error")) {
      exp2 <- try(as.numeric(gsub("[[:alpha:]]|_|\\-","",expect)), silent=TRUE)
      if(inherits(exp2, "try-error")) stop(fxNa,"Argument 'expect=",pasteC(utils::head(expect)),"' contains no valid numeric content !") else {
        if(!silent) message(fxNa,"Trying to convert from 'expect=",pasteC(utils::head(expect)),"' to numeric  ",pasteC(utils::head(exp2))) }
    }
  } 
  if(doSelect) {  
    if(length(rowNa) >1) { message(fxNa," 'rowNa' should have length of 1 (but is ",length(rowNa),"), truncating ...")
      rowNa <- rowNa[1] }
    hasAnn <- subPat <- FALSE
    if(length(startLev) >0) if(!is.numeric(startLev)) stop("Argument 'startLev' must be integer")
    if(debug) {message(fxNa,"lMS0"); lMS0 <- list(rowNa=rowNa,dat=dat,logExpect=logExpect,startLev=startLev,lisNa=lisNa,hasAnn=hasAnn)}
    if(is.list(dat)) {
      ## 'dat' is list
      if(lisNa[2] %in% names(lisNa) && lisNa[2] %in% names(dat)) if(annoColNa[2] %in% colnames(dat[[lisNa[2]]])) hasAnn <- TRUE
      chLi <- lisNa %in% names(dat)
      if(all(!chLi)) stop("None of the list-elements given in 'lisNa' were found in 'dat' !")
      if(any(!chLi)) message(fxNa,"Trouble ahead : The elements ",pasteC(lisNa[which(!chLi)], quoteC="'")," not found !!")
      if(debug) {message(fxNa,"lMS1a"); lMS1a <- list(rowNa=rowNa,dat=dat,logExpect=logExpect,startLev=startLev,lisNa=lisNa,doSelect=doSelect,hasAnn=hasAnn, chLi=chLi)}
      if(is.numeric(rowNa) && grepl("^[[:digit:]]+$", rowNa)) {                     # is index
        linNo <- as.integer(rowNa)
        if(linNo >0 && linNo <= nrow(dat[[lisNa[2]]])) rowNa <- dat[[lisNa[2]]][rowNa,annoColNa[1]] else { doSelect <- FALSE
          if(!silent) message(fxNa,"Invalid line number (",linNo,")") }
      } else {
        linNo <- if(hasAnn) which(dat[[lisNa[2]]][,annoColNa[1]] == rowNa) else which(rownames(dat[[lisNa[1]]]) ==rowNa)  
        if(length(linNo) >1) { if(!silent) message(fxNa,"Name specified in argument 'lisNa' not unique, using first")
          linNo <- linNo[1] } else if(length(linNo) <1) {  # try to find from rownames
            linNo <- which(rownames(dat[[lisNa[2]]]) %in% rowNa)
            if(length(linNo) <1 && all(grepl("^[[:digit:]]+$", rowNa))) linNo <- as.numeric(rowNa)
            if(length(linNo) <1) { doSelect <- FALSE; if(!silent) message(fxNa,"Unable to find row corresponding to ",rowNa)}
          } 
        }
      dat1 <- if(doSelect) dat[[lisNa[3]]][linNo,] else NULL                       # get imputed data
    } else { 
      ## simple matrix data
      linNo <- if(is.numeric(rowNa) && grepl("^[[:digit:]]+$", rowNa)) as.integer(rowNa) else which(rownames(dat) %in% rowNa)
      if(length(linNo) !=1 && !silent) message(fxNa," Note : ",length(linNo)," lines of 'dat' matched to '",rowNa,"' ! (can use only 1st)")
      if(length(linNo) >1) linNo <- linNo[1] else if(length(linNo) <1) {doSelect <- FALSE; if(!silent) message(fxNa,"Could NOT match ",rowNa," to rownames of ",argNa[2])}
      dat1 <- if(doSelect) dat[linNo,] else NULL}
    if(debug) {message(fxNa,"lMS1"); lMS1 <- list(rowNa=rowNa,dat=dat,logExpect=logExpect,startLev=startLev,lisNa=lisNa,doSelect=doSelect, dat1=dat1)}
      
    ## which starting levels to test
    if(doSelect) {
      startLev <- if(length(startLev) <1) 1:floor(length(unique(expect))/2) else as.integer(startLev)
      expect0 <- expect
      if(!is.numeric(expect)) {
        subPat <- "[[:alpha:]]*[[:punct:]]*[[:alpha:]]*"
        subPat <- paste0(c("^",""),subPat,c("","$"))   
        expect <- try(as.numeric( sub(subPat[2], "", sub(subPat[1], "", as.character(expect))))) } 
      if(inherits(expect, "try-error")) stop(fxNa,"Problem extracting the numeric content of 'expect': ",pasteC(expect0,quoteC="'"))  
      if(!is.numeric(expect)) {
        expect <- as.numeric(as.factor(as.character(expect)))
        if(!silent) message(fxNa,"Note: 'expect' is not numeric, transforming to integers")
      }
      if(debug) {message(fxNa,"lMS2"); lMS2 <- list(rowNa=rowNa,dat=dat,logExpect=logExpect,startLev=startLev,lisNa=lisNa, dat1=dat1,expect0=expect0,expect=expect)}
    
      ## MAIN MODEL
      dat1 <- data.frame(conc=if(logExpect) log2(expect) else expect, quant=dat1, concL=expect)            # omics quantitation data is already log2
      lm0 <- lapply(startLev, function(x) { lmX <- if(!isFALSE(suppressWarnings)) suppressWarnings(
        try(stats::lm(quant ~ conc, data=dat1[which(expect >= sort(unique(expect))[x]),]))) else try(stats::lm(quant ~ conc, data=dat1[which(expect >= sort(unique(expect))[x]),])); lmX })
      slopeAndP <- sapply(lm0, function(x)  z <- stats::coef(summary(x))[2,c("Estimate","Pr(>|t|)")])
      bestReg <- which.min(slopeAndP[2,])
      if(!silent) message(fxNa," best slope pVal starting at level no ",bestReg)
      if(debug) {message(fxNa,"lMS3"); lMS3 <- list(rowNa=rowNa,dat=dat,logExpect=logExpect,startLev=startLev,lisNa=lisNa, dat1=dat1,expect0=expect0,expect=expect,lm0=lm0,slopeAndP=slopeAndP,bestReg=bestReg)}
      
      ## PLOT
      if(plotGraph) {
        levExp <- sort(unique(expect))
        usedP <- expect >= levExp[bestReg] 
        useCol <- rep(quantCol[1], nrow(dat1))                    # initialize as used
        pch1 <- rep(pch[1], nrow(dat1))                           # initialize as used
        if(any(!usedP)) { useCol[which(!usedP)] <- quantCol[2]    # color used for not-used  
          pch1[which(!usedP)] <- pch[2] }                         # symbol used for not-used  
        
        chNa <- is.list(dat) && lisNa[1] %in% names(dat)
        if(any(chNa)) chNa <- is.na(dat[[lisNa[1]]][linNo,])
        hasNa <- any(chNa)
        legCol <- quantCol[if(hasNa) c(1:3,3) else 1:2]
        legPch <- pch[if(hasNa) c(1,2,1,2) else 1:2]
        if(hasNa) {useCol[which(chNa)] <- quantCol[3]} else {legLab <- legLab[1:2]}
        if(length(tit) <1) { tit <- if(hasAnn) dat[[lisNa[2]]][linNo, annoColNa[2]] else paste0(rowNa," (from ",argNa[2],")")}
        if(length(yLab) <1) yLab <- "measured"
        if(length(xLab) <1) xLab <- "expected"
        if(logExpect) xLab <- paste0(xLab," (log-scale)")
        ## main plot
        chG <- try(graphics::plot(quant ~ conc, data=dat1, col=useCol, main=tit,ylab=yLab, xlab=xLab, las=1, pch=pch1, cex.lab=cexLab, col.axis="white", cex.axis=0.3,tck=0), silent=TRUE)
        if(inherits(chG, "try-error")) message(fxNa,"UNABLE to draw figure !!") else {       
          graphics::mtext(paste0("best regr starting at level ",bestReg," (ie ",sort(unique(expect))[bestReg],"),  slope=",signif(slopeAndP[1,bestReg],3),
            ",  p=",signif(slopeAndP[2,bestReg],2)), cex=cexSub, col=quantCol[1])
          ## use crt= for rotating text on bottom ? ... crt works only for text()
          graphics::mtext(at=(unique(dat1[,1])), signif(if(logExpect) 2^(unique(dat1[,1])) else unique(dat1[,1]),3), side=1, las=xLabLas,col=1,cex=cexXAxis)    # set las to vertical ?
          graphics::abline(lm0[[bestReg]], lty=2, col=quantCol[1])
          graphics::axis(side=2, las=1, col=1, tick=TRUE, cex.axis=cexYAxis)                                         # left axis
          ptBg <- quantCol
          chPch <- pch %in% c(21:25)
          if(any(chPch)) { quantCol[which(chPch)] <- grDevices::rgb(0.2,0.2,0.2,0.4) }
          if(requireNamespace("wrGraph", quietly=TRUE)) {
            legLoc <- try(wrGraph::checkForLegLoc(dat1, sampleGrp=legLab, showLegend=FALSE)$loc, silent=TRUE)
          } else {
            if(!silent) message(fxNa,"NOTE: Package 'wrGraph' not installed from CRAN for searching optimal placement of legend  (setting to default 'bottomright')")
            legLoc <- "bottomright"
          } 
          if(inherits(legLoc, "try-error")) { legLoc <- "bottomright"
             message(fxNa,"Did not succeed in determining optimal legend location")}
          tmp <- try(graphics::legend(legLoc, legLab, pch=legPch, col=legCol, text.col=legCol, pt.bg=ptBg, cex=cexLeg, xjust=0.5,yjust=0.5), silent=TRUE)        # as points
          if("try-error" %in% class(tmp)) message(fxNa,"NOTE : Unable to add legend !  ",as.character(tmp))
        }  
      }
      list(coef=stats::coef(summary(lm0[[bestReg]])), name=rowNa, startLev=bestReg) } } }
     

Try the wrMisc package in your browser

Any scripts or data that you put into this service are public.

wrMisc documentation built on March 9, 2026, 5:07 p.m.