########################
## Benjamin Haibe-Kains & Petr Smirnov
## October 23, 2013
########################
#' @importFrom stats sd
#' @importFrom stats complete.cases
#' @importFrom stats lm
#' @importFrom stats glm
#' @importFrom stats anova
#' @importFrom stats pf
#' @importFrom stats formula
#' @importFrom stats var
#' @importFrom scales rescale
geneDrugSensitivity <- function(x, type, batch, drugpheno, interaction.typexgene=FALSE, model=FALSE, standardize=c("SD", "rescale", "none"), verbose=FALSE) {
## input:
## x: numeric vector of gene expression values
## type: vector of factors specifying the cell lines or type types
## batch: vector of factors specifying the batch
## drugpheno: numeric vector of drug sensitivity values (e.g., IC50 or AUC)
## duration: numeric vector of experiment duration in hours
## interaction.typexgene: Should interaction between gene expression and cell/type type be computed? Default set to FALSE
## model: Should the full linear model be returned? Default set to FALSE
##
## output:
## vector reporting the effect size (estimateof the coefficient of drug concentration), standard error (se), sample size (n), t statistic, and F statistics and its corresponding p-value
standardize <- match.arg(standardize)
colnames(drugpheno) <- paste("drugpheno", seq_len(ncol(drugpheno)), sep=".")
drugpheno <- data.frame(vapply(drugpheno, function(x) {
if (!is.factor(x)) {
x[is.infinite(x)] <- NA
}
return(list(x))
}, USE.NAMES=FALSE), check.names=FALSE,
FUN.VALUE=list(1))
ccix <- complete.cases(x, type, batch, drugpheno)
nn <- sum(ccix)
if(length(table(drugpheno)) > 2){
if(ncol(drugpheno)>1){
##### FIX NAMES!!!
rest <- lapply(seq_len(ncol(drugpheno)), function(i){
est <- paste("estimate", i, sep=".")
se <- paste("se", i, sep=".")
tstat <- paste("tstat", i, sep=".")
rest <- rep(NA, 3)
names(rest) <- c(est, se, tstat)
return(rest)
})
rest <- do.call(c, rest)
rest <- c(rest, n=nn, "fstat"=NA, "pvalue"=NA)
} else {
rest <- c("estimate"=NA, "se"=NA, "n"=nn, "tstat"=NA, "fstat"=NA, "pvalue"=NA, "df"=NA)
}
} else {
rest <- c("estimate"=NA, "se"=NA, "n"=nn, "pvalue"=NA)
}
if(nn < 3 || var(x[ccix], na.rm=TRUE) == 0) {
## not enough samples with complete information or no variation in gene expression
return(rest)
}
## standardized coefficient in linear model
if(length(table(drugpheno)) > 2 & standardize!= "none") {
switch(standardize,
"SD" = drugpheno <- apply(drugpheno, 2, function(x){
return(x[ccix]/sd(as.numeric(x[ccix])))}) ,
"rescale" = drugpheno <- apply(drugpheno, 2, function(x){
return(rescale(as.numeric(x[ccix]), q=0.05, na.rm=TRUE)) })
)
}else{
drugpheno <- drugpheno[ccix,,drop=FALSE]
}
if(length(table(x)) > 2 & standardize!= "none"){
switch(standardize,
"SD" = xx <- x[ccix]/sd(as.numeric(x[ccix])) ,
"rescale" = xx <- rescale(as.numeric(x[ccix]), q=0.05, na.rm=TRUE)
)
}else{
xx <- x[ccix]
}
if(ncol(drugpheno)>1){
ff0 <- paste("cbind(", paste(paste("drugpheno", seq_len(ncol(drugpheno)), sep="."), collapse=","), ")", sep="")
} else {
ff0 <- sprintf("drugpheno.1")
}
dd <- data.frame(drugpheno, "x"=xx)
## control for tissue type
if(length(sort(unique(type))) > 1) {
dd <- cbind(dd, type=type[ccix])
}
## control for batch
if(length(sort(unique(batch))) > 1) {
dd <- cbind(dd, batch=batch[ccix])
}
if(any(unlist(lapply(drugpheno,is.factor)))){
rr0 <- tryCatch(try(glm(formula(drugpheno.1 ~ . - x), data=dd, model=FALSE, x=FALSE, y=FALSE, family="binomial")),
warning=function(w) {
if(verbose) {
ww <- "Null model did not convrge"
message(ww)
if("type" %in% colnames(dd)) {
tt <- table(dd[,"type"])
message(tt)
}
}
})
rr1 <- tryCatch(try(glm(formula(drugpheno.1 ~ .), data=dd, model=FALSE, x=FALSE, y=FALSE, family="binomial")),
warning=function(w) {
if(verbose) {
ww <- "Model did not converge"
tt <- table(dd[,"drugpheno.1"])
message(ww)
message(tt)
}
return(ww)
})
} else{
rr0 <- tryCatch(try(lm(formula(paste(ff0, "~ . -x", sep=" ")), data=dd)),
warning=function(w) {
if(verbose) {
ww <- "Null model did not converge"
message(ww)
if("type" %in% colnames(dd)) {
tt <- table(dd[,"type"])
message(tt)
}
}
})
rr1 <- tryCatch(try(lm(formula(paste(ff0, "~ . ", sep=" ")), data=dd)),
warning=function(w) {
if(verbose) {
ww <- "Model did not converge"
tt <- table(dd[,"drugpheno.1"])
message(ww)
message(tt)
}
return(ww)
})
}
## FIXME:: Do we really want a vectorized and here?
if (!is(rr0, "try-error") && !is(rr1, "try-error") &
!is(rr0, "character") && !is(rr1, "character")) {
rr <- summary(rr1)
if(any(unlist(lapply(drugpheno,is.factor)))){
rrc <- stats::anova(rr0, rr1, test="Chisq")
rest <- c("estimate"=rr$coefficients[grep("^x", rownames(rr$coefficients)), "Estimate"], "se"=rr$coefficients[grep("^x", rownames(rr$coefficients)), "Std. Error"], "n"=nn, "pvalue"=rrc$'Pr(>Chi)'[2])
names(rest) <- c("estimate", "se", "n", "pvalue")
} else {
if(ncol(drugpheno)>1){
rrc <- summary(stats::manova(rr1))
rest <- lapply(seq_len(ncol(drugpheno)), function(i) {
est <- paste("estimate", i, sep=".")
se <- paste("se", i, sep=".")
tstat <- paste("tstat", i, sep=".")
rest <- c(rr[[i]]$coefficients[grep("^x", rownames(rr[[i]]$coefficients)), "Estimate"], rr[[i]]$coefficients[grep("^x", rownames(rr[[i]]$coefficients)), "Std. Error"], rr[[i]]$coefficients[grep("^x", rownames(rr[[i]]$coefficients)), "t value"])
names(rest) <- c(est, se, tstat)
return(rest)
})
rest <- do.call(c, rest)
rest <- c(rest,"n"=nn, "fstat"=rrc$stats[grep("^x", rownames(rrc$stats)), "approx F"], "pvalue"=rrc$stats[grep("^x", rownames(rrc$stats)), "Pr(>F)"])
} else {
rrc <- stats::anova(rr0, rr1, test = "F")
rest <- c("estimate"=rr$coefficients[grep("^x", rownames(rr$coefficients)), "Estimate"], "se"=rr$coefficients[grep("^x", rownames(rr$coefficients)), "Std. Error"],"n"=nn, "tstat"=rr$coefficients[grep("^x", rownames(rr$coefficients)), "t value"], "fstat"=rrc$F[2], "pvalue"=rrc$'Pr(>F)'[2], "df"=rr1$df.residual)
names(rest) <- c("estimate", "se", "n", "tstat", "fstat", "pvalue", "df")
}
}
if(model) { rest <- list("stats"=rest, "model"=rr1) }
}
return(rest)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.