println <- function(...) {
cat(date(), '>', ..., '\n')
flush.console()
}
get_cores <- function(...) ifelse(interactive(), 1, future::availableCores())
mclapply2 <- function(X, FUN, ...,
mc.preschedule = TRUE, mc.set.seed = TRUE,
mc.silent = FALSE, mc.cores = get_cores(),
mc.cleanup = TRUE, mc.allow.recursive = FALSE) {
original_class <- class(X)
if ( is.data.frame(X) ) new_x <- FUN(X, ...)
else new_x <- parallel::mclapply(X, FUN, ...,
mc.preschedule = mc.preschedule,
mc.set.seed = mc.set.seed,
mc.silent = mc.silent, mc.cores = mc.cores,
mc.cleanup = mc.cleanup,
mc.allow.recursive = mc.allow.recursive)
class(new_x) <- original_class
return ( new_x )
}
get_file_inputs <- function(command_args, ...) {
inputs <- list(data=command_args[6], annotation=command_args[7])
stopifnot(! is.na(inputs$data) | ! file.exists(inputs$data) | ! is.na(inputs$data))
return ( inputs )
}
get_controls <- function(command_args, ...) {
controls <- list(positive=command_args[9], negative=command_args[8])
if ( all(!is.na(controls)) ) println('Controls are', controls$positive, '(positive) and',
controls$negative, '(negative)')
return ( controls )
}
check_headers <- function(dataframe,
essential_headers=c('compound', 'concentration', 'strain', 'plate_name', 'count'))
stopifnot(all(essential_headers %in% names(dataframe)))
pyjoin <- function(x, char) paste(x, collapse=char)
splitter <- function(x, char, n=NULL, rejoin_char=char) {
vapply(x, function(y) {
split_string <- strsplit(y, char, fixed=TRUE)[[1]]
if ( is.null(n) ) n_ <- seq_len(length(split_string)) else n_ <- n
pyjoin(split_string[n_], char=rejoin_char)
}, 'a')
}
multigrepl <- function(patterns, targets) apply(vapply(patterns, grepl, grepl(patterns[1], targets), targets), 1, any)
capitalize <- function(x) {
initials <- substr(x, 1, 1)
other_chars <- substr(x, ifelse(nchar(x) > 1, 2, 0), ifelse(nchar(x) > 1, nchar(x), 0))
return ( paste0(toupper(initials), other_chars) )
}
#' @importFrom magrittr %>%
column2vector <- function (x, column) x %>% dplyr::ungroup() %>% dplyr::select_(column) %>% unlist()
#' @importFrom magrittr %>%
get_unique_values <- function(x, column, output.class=NULL) {
x2 <- column2vector(x, column)
if (inherits(x2, 'factor') ) x2 <- x2 %>% droplevels() %>% levels()
else x2 <- unique(x2)
if ( !is.numeric(x2) & !is.null(output.class) & is.function(output.class) ) x2 <- output.class(x2)
return ( x2 )
}
#' @importFrom magrittr %>%
get_compound_core <- function(x, column='compound') {
library('dplyr')
new_df <- x %>%
dplyr::ungroup() %>%
dplyr::select_(column) %>%
dplyr::distinct() %>%
dplyr::arrange() %>%
dplyr::mutate_(compound_stem=paste0('sapply(as.character(', column, '), function(x) strsplit(x, "-", fixed=TRUE)[[1]][2])'))
return ( inner_join(x, new_df) )
}
#' @importFrom magrittr %>%
make_columns_factors <- function(x, columns) {
mutate_string <- paste0('as.factor(', columns, ')') %>% setNames(columns)
x <- x %>% dplyr::mutate_(.dots=mutate_string)
return ( x )
}
check_same_levels <- function(df1, df2, column) {
levels_present1 <- get_unique_values(df1, column)
levels_present2 <- get_unique_values(df2, column)
return(length(setdiff(levels_present1, levels_present2)) > 0)
}
# statistical functions
optimizer <- function(f, interval, plot=NULL, ..., lower = min(interval),
upper = max(interval), maximum = FALSE,
tol = .Machine$double.eps^0.25, max_recursion=100, recursion_no=0) {
optimized <- optimize(f=f, interval=interval, ..., lower=lower,
upper=upper, maximum=maximum, tol=tol)
if ( is.finite(optimized$objective) & f(upper + 0.1) > optimized$objective | recursion_no > max_recursion ) {
return ( optimized )
} else {
println('Shifting window from',
pyjoin(signif(interval, 2), char=', '),
'to',
pyjoin(signif(interval, 2) + 1, char=', '),
'; current minimum is',
signif(optimized$objective, 2), 'at',
signif(optimized$minimum, 2))
optimizer(f=f, interval=interval + 1, plot=plot, ..., maximum=maximum, tol=tol,
max_recursion=max_recursion, recursion_no=recursion_no + 1)
}
}
cr_adjustment <- function(means, dispersion, model_matrix) {
weights_matrix <- diag(1 / ((1 / means) + dispersion))
fisher_info <- crossprod(model_matrix, weights_matrix) %*% model_matrix
cr_penalty <- 0.5 * determinant(fisher_info, logarithm=TRUE)$modulus[1]
return(cr_penalty)
}
nb_dispersion_log_likelihood <- function(observed_data, means, dispersion)
sum(log(mapply(dnbinom, x=observed_data, mu=means, size=1 / dispersion)))
cr_adjusted_log_likelihood <- function(observed_data, means, dispersion, model_matrix)
nb_dispersion_log_likelihood(observed_data,
means,
dispersion) - cr_adjustment(means,
dispersion,
model_matrix)
#' @importFrom errR %except%
estimate_nb_dispersion_mle <- function(model, data, initial_guess,
log_likelihood_function=cr_adjusted_log_likelihood,
bounds=c(-3, 3)) {
model_string <- as.character(model)
model_glm <- glm(model, MASS::negative.binomial(1 / initial_guess), data) %except% NULL
if ( is.null(model_glm) ) {
println('Intial dispersion model didn\'t fit:', model_string)
print(initial_guess)
}
response_variable <- names(model_glm)[1]
response_vector <- model_glm$model[ , 1]
predicted_mus <- model_glm$fitted.values
model_matrix <- model.matrix(model_glm)
objective_f <- function(x)
-log_likelihood_function(observed_data=response_vector,
means=predicted_mus, dispersion=x, model_matrix=model_matrix)
println('Optimizing log likelihood function')
#optimized <- optimizer(objective_f, bounds + log(initial_guess))
optimized <- nlm(objective_f, initial_guess)
#print(optimized)
optimized_dispersion <- optimized$estimate
return ( optimized_dispersion )
}
hijack <- function (f, ...) {
.f <- f
args <- list(...)
.f.formals <- formals(.f)
new.formals <- names(.f.formals) %>% lapply(function(x) {
if (x %in% names(args)) return(args[[x]]) else return(.f.formals[[x]])
}) %>% setNames(names(.f.formals))
#print(new.formals)
formals(.f) <- new.formals
return ( .f )
}
trim_glm <- function(x, ...) {
x$data <- NULL
x$y <- NULL
#x$linear.predictors <- NULL
x$weights <- NULL
x$fitted.values <- NULL
x$model <- NULL
x$prior.weights <- NULL
x$residuals <- NULL
x$effects <- NULL
#x$qr$qr <- NULL
return ( x )
}
#' @importFrom magrittr %>%
calculate_roc_stats <- function(x) {
x <- x %>%
dplyr::summarize(n_positive=sum(condition_positive, na.rm=TRUE),
n_negative=sum(condition_negative, na.rm=TRUE),
n_tests=n(),
prevalence=mean(condition_positive, na.rm=TRUE),
n_called_positive=sum(called_positive, na.rm=TRUE),
n_called_negative=sum(called_negative, na.rm=TRUE),
n_false_positive=sum(false_positive, na.rm=TRUE),
n_false_negative=sum(false_negative, na.rm=TRUE),
n_true_positive=sum(true_positive, na.rm=TRUE),
n_true_negative=sum(true_negative, na.rm=TRUE),
true_positive_rate=sum(true_positive, na.rm=TRUE) / n_positive,
false_positive_rate=sum(false_positive, na.rm=TRUE) / n_negative,
true_negative_rate=sum(true_negative, na.rm=TRUE) / n_negative,
false_negative_rate=sum(false_negative, na.rm=TRUE) / n_positive,
accuracy=mean(correct, na.rm=TRUE),
false_discovery_rate=sum(false_positive, na.rm=TRUE) / n_called_positive,
positive_predictive_value=sum(true_positive, na.rm=TRUE) / n_called_positive,
f1_score=2 / ((1 / true_positive_rate) + (1 / positive_predictive_value))) %>%
mutate(false_discovery_rate=ifelse(is.na(false_discovery_rate), 0, false_discovery_rate),
positive_predictive_value=ifelse(is.na(positive_predictive_value), 0, positive_predictive_value),
f1_score=ifelse(is.na(f1_score), 0, f1_score))
return ( x )
}
#' @importFrom magrittr %>%
calculate_roc_table <- function(x, positive_compound, cutoff_column, cutoffs, prevalence) {
n_positives <- nrow(dplyr::filter(x, grepl(positive_compound, compound)))
n_negatives <- nrow(dplyr::filter(x, !grepl(positive_compound, compound)))
n_to_sample <- prevalence * n_negatives
sampled_positives <- x %>%
dplyr::filter(grepl(positive_compound, compound)) %>%
dplyr::ungroup() %>%
dplyr::sample_n(n_to_sample)
x2 <- x %>%
dplyr::filter(!grepl(positive_compound, compound))
dplyr::bind_rows(sampled_positives)
x <- x %>%
tidyr::crossing(cutoff=cutoffs) %>%
dplyr::mutate(condition_positive=grepl(positive_compound, compound),
condition_negative=! condition_positive) %>%
dplyr::mutate_(.dots=c(called_positive=paste(cutoff_column, '<= cutoff'))) %>%
dplyr::mutate(called_negative=!called_positive,
true_positive=condition_positive & called_positive,
false_positive=condition_negative & called_positive,
true_negative=condition_negative & called_negative,
false_negative=condition_positive & called_negative,
correct=true_positive | true_negative)
return ( x )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.