R/geneDrugSensitivity.R

Defines functions geneRadSensitivity

#' Calcualte the gene radiation sensitivity score
#'
#' @examples
#'
#'
#' @param x \code{numeric} vector of gene expression values
#' @param type \code{factor} vector specifying the cell lines or type types
#' @param batch \code{factor} vector of factors specifying the batch
#' @param drugpheno \code{numeric} vector of drug sensitivity values (e.g.,
#'   IC50 or AUC)
#' @param model Should the full linear model be returned? Default set to FALSE
#' @param standardize \code{character} One of 'SD', 'rescale' or 'none'
#' @param verbose \code{logical} Should informative messages be written to
#'   the console?
#'
#' @return A \code{numeric} vector reporting the effect size (estimate of the
#'   coefficient of drug concentration), standard error (se), sample size (n),
#'   t statistic, and F statistics and its corresponding p-value
#'
#' @importFrom stats sd complete.cases lm glm anova pf formula var
#' @importFrom scales rescale
#'
#' @noRd
geneRadSensitivity <- function(x,
                               type,
                               batch,
                               drugpheno,
                               model=FALSE,
                               standardize=c("SD", "rescale", "none"),
                               verbose=FALSE)
{
  standardize <- match.arg(standardize)

  colnames(drugpheno) <- paste("drugpheno", seq_len(ncol(drugpheno)), sep=".")

  drugpheno <- data.frame(vapply(drugpheno, function(x) {
    if (!is.factor(x)) {
      x[is.infinite(x)] <- NA
    }
    return(list(x))
  }, list(1)), check.names=FALSE)

  ccix <- complete.cases(x, type, batch, drugpheno)
  nn <- sum(ccix)

  if(length(table(drugpheno)) > 2){
     if(ncol(drugpheno)>1){
      ##### FIX NAMES!!!
      rest <- lapply(seq_len(ncol(drugpheno)), function(i){

        est <- paste("estimate", i, sep=".")
        se <-  paste("se", i, sep=".")
        tstat <- paste("tstat", i, sep=".")

        rest <- rep(NA, 3)
        names(rest) <- c(est, se, tstat)
        return(rest)

      })
      rest <- do.call(c, rest)
      rest <- c(rest, n=nn, "fstat"=NA, "pvalue"=NA)
    } else {
      rest <- c("estimate"=NA, "se"=NA, "n"=nn, "tstat"=NA, "fstat"=NA, "pvalue"=NA, "df"=NA)
    }
  } else {
    rest <- c("estimate"=NA, "se"=NA, "n"=nn, "pvalue"=NA)
  }

  if(nn < 3 || var(x[ccix], na.rm=TRUE) == 0) {
    ## not enough samples with complete information or no variation in gene expression
    return(rest)
  }

  ## standardized coefficient in linear model
  if(length(table(drugpheno)) > 2 & standardize!= "none") {
    switch(standardize,
      "SD" = drugpheno <- apply(drugpheno, 2, function(x){
      return(x[ccix]/sd(as.numeric(x[ccix])))}) ,
      "rescale" = drugpheno <- apply(drugpheno, 2, function(x){
      return(rescale(as.numeric(x[ccix]), q=0.05, na.rm=TRUE))    })
      )

  }else{
    drugpheno <- drugpheno[ccix,,drop=FALSE]
  }
  if(length(table(x)) > 2  & standardize!= "none"){
    switch(standardize,
      "SD" = xx <- x[ccix]/sd(as.numeric(x[ccix])) ,
      "rescale" = xx <- rescale(as.numeric(x[ccix]), q=0.05, na.rm=TRUE)
      )
  }else{
    xx <- x[ccix]
  }
  if(ncol(drugpheno)>1){
    ff0 <- paste("cbind(", paste(paste("drugpheno", seq_len(ncol(drugpheno)), sep="."), collapse=","), ")", sep="")
  } else {
    ff0 <- sprintf("drugpheno.1")
  }

  dd <- data.frame(drugpheno, "x"=xx)

  ## control for tissue type
  if(length(sort(unique(type))) > 1) {
    dd <- cbind(dd, type=type[ccix])
  }
  ## control for batch
  if(length(sort(unique(batch))) > 1) {
        dd <- cbind(dd, batch=batch[ccix])
  }
if(any(unlist(lapply(drugpheno,is.factor)))){

rr0 <- tryCatch(try(glm(formula(drugpheno.1 ~ . - x), data=dd, model=FALSE, x=FALSE, y=FALSE, family="binomial")),
    warning=function(w) {
      if(verbose) {
        ww <- "Null model did not convrge"
        message(ww)
        if("type" %in% colnames(dd)) {
          tt <- table(dd[,"type"])
          message(tt)
        }
      }
    })
  rr1 <- tryCatch(try(glm(formula(drugpheno.1 ~ .), data=dd, model=FALSE, x=FALSE, y=FALSE, family="binomial")),
    warning=function(w) {
      if(verbose) {
        ww <- "Model did not converge"
        tt <- table(dd[,"drugpheno.1"])
        message(ww)
        message(tt)
      }
      return(ww)
    })


} else{

rr0 <- tryCatch(try(lm(formula(paste(ff0, "~ . -x", sep=" ")), data=dd)),
    warning=function(w) {
      if(verbose) {
        ww <- "Null model did not converge"
        message(ww)
        if("type" %in% colnames(dd)) {
          tt <- table(dd[,"type"])
          message(tt)
        }
      }
    })
  rr1 <- tryCatch(try(lm(formula(paste(ff0, "~ . ", sep=" ")), data=dd)),
    warning=function(w) {
      if(verbose) {
        ww <- "Model did not converge"
        tt <- table(dd[,"drugpheno.1"])
        message(ww)
        message(tt)
      }
      return(ww)
    })


}


  if (!is(rr0, "try-error") && !is(rr1, "try-error") & !is(rr0, "character") && !is(rr1, "character")) {
    rr <- summary(rr1)

    if(any(unlist(lapply(drugpheno,is.factor)))){
      rrc <- stats::anova(rr0, rr1, test="Chisq")
      rest <- c("estimate"=rr$coefficients[grep("^x", rownames(rr$coefficients)), "Estimate"], "se"=rr$coefficients[grep("^x", rownames(rr$coefficients)), "Std. Error"], "n"=nn, "pvalue"=rrc$'Pr(>Chi)'[2])
      names(rest) <- c("estimate", "se", "n", "pvalue")

    } else {
      if(ncol(drugpheno)>1){
        rrc <- summary(stats::manova(rr1))
        rest <- lapply(seq_len(ncol(drugpheno)), function(i) {
          est <- paste("estimate", i, sep=".")
          se <-  paste("se", i, sep=".")
          tstat <- paste("tstat", i, sep=".")
          rest <- c(rr[[i]]$coefficients[grep("^x", rownames(rr[[i]]$coefficients)), "Estimate"], rr[[i]]$coefficients[grep("^x", rownames(rr[[i]]$coefficients)), "Std. Error"], rr[[i]]$coefficients[grep("^x", rownames(rr[[i]]$coefficients)), "t value"])
          names(rest) <- c(est, se, tstat)
          return(rest)
        })
        rest <- do.call(c, rest)
        rest <- c(rest,"n"=nn, "fstat"=rrc$stats[grep("^x", rownames(rrc$stats)), "approx F"], "pvalue"=rrc$stats[grep("^x", rownames(rrc$stats)), "Pr(>F)"])
      } else {
        rrc <- stats::anova(rr0, rr1, test = "F")
        rest <- c("estimate"=rr$coefficients[grep("^x", rownames(rr$coefficients)), "Estimate"], "se"=rr$coefficients[grep("^x", rownames(rr$coefficients)), "Std. Error"],"n"=nn, "tstat"=rr$coefficients[grep("^x", rownames(rr$coefficients)), "t value"], "fstat"=rrc$F[2], "pvalue"=rrc$'Pr(>F)'[2], "df"=rr1$df.residual)
        names(rest) <- c("estimate", "se", "n", "tstat", "fstat", "pvalue", "df")
      }
    }

    if(model) { rest <- list("stats"=rest, "model"=rr1) }
  }
  return(rest)
}

Try the RadioGx package in your browser

Any scripts or data that you put into this service are public.

RadioGx documentation built on Nov. 8, 2020, 8:21 p.m.