# All code that compute or plot positional entropy
# Shoud later be split into separate files?
#' Given data at a single point of a single patient, compute the entropy at
#' each position
#'
#' Defaults to Shannon entropy. If other entropies are required, contact the
#' package maintainer. (Trivial expansion)
#'
#' @param seq_data The filtered sequence data whose positional entropy
#' scores must be computed.
#' @export
compute_pentropy <- function(seq_data){
consensus <- consensusMatrix(seq_data)
letter_frequencies <- apply(consensus, 1, sum)
letters_that_occur <- names(letter_frequencies[letter_frequencies>0])
consensus <- consensus[attr(consensus, 'dimnames')[[1]] %in% letters_that_occur, ]
pentropy <- data.frame(pos = 1:ncol(consensus),
variable = 'entropy',
value = apply(consensus, 2, entropy))
return(pentropy)
}
#' Given data at a single point of a single patient, compute the entropy at
#' each position
#'
#' Defaults to Shannon entropy. If other entropies are required, contact the
#' package maintainer. (Trivial expansion)
#'
#' TODO: Not so trivial, expand this function to not only work over time, but
#' over arbitrary splits.
#'
#' @param seq_data The unfiltered sequence data whose positional entropy
#' scores must be computed.
#' @export
compute_all_pentropy <- function(seq_data){
all_pos_data <- data.frame(time_point = character(0),
pos = integer(0),
variable = character(0),
value = numeric(0))
time_points <- get_unique_points_of('time', seq_data, indx = 2)
for (time_point in time_points){
seq_at_time_data <- get_data_of('timepoint', seq_data, time_point)
pentropy <- compute_pentropy(seq_at_time_data)
all_pos_data <- rbind(all_pos_data,
cbind(data.frame(time_point = time_point),
pentropy))
}
return(all_pos_data)
}
#' Aggregates the data over the points (usually time points) computing various
#' metrics of the values.
#'
#' @param all_pos_data The position data for all the points
#' @param variable A character vector of the names of the variables to compute
#' metrics on.
#' @export
aggregate_over_points <- function(all_pos_data, variables = 'entropy'){
aggregated_metrics <- data.frame(pos = numeric(0),
variable = character(0),
value = numeric(0))
for (the_variable in variables){
variable_data <- subset(all_pos_data, variable == the_variable)
tmp <- ddply(variable_data,
.(pos),
summarize,
n = length(value),
max = max(value),
min = min(value),
mean = mean(value),
median = median(value),
sd = sd(value))
tmp <- melt(tmp, id.vars = 'pos')
tmp$variable <- paste0(tmp$variable, '_', the_variable)
aggregated_metrics <- rbind(aggregated_metrics, tmp)
}
return(aggregated_metrics)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.