#' Make chemical table
#'
#' Summarize information about each chemical, including retention time and frequency of occurence by groups. Prerequisite for the filter* functions.
#'
#' Generates chemtable, a data frame of the compounds, with the following columns:
#' \describe{
#' \item{compound}{compound name}
#' \item{RT}{mean RT}
#' \item{RT.var}{variance of RT}
#' \item{match}{mean match}
#' \item{match.var}{variance of match}
#' \item{max.floral}{maximum area in floral samples}
#' \item{mean.floral}{mean area in floral samples, including zeros}
#' \item{mean.nonzero.floral}{mean area in floral samples, excluding zeros}
#' \item{median.floral}{median area in floral samples, including zeros}
#' \item{count.X}{multiple columns, number of occurrences within types and treatment groups}
#' \item{freq.X}{multiple columns, proportion of occurrences within types and treatment groups}
#' }
#' @param gcdata peaks integrated from GC data, either in long format (peaks in rows with columns for area, match score, and retention time, as generated by `load_longdata()`) or if detailed peak information is not available, in wide format (table of peak areas with samples in rows and compounds in columns, as generated by `make_sampletable()`).
#' @param metadata the sample metadata, in the same row order as gcdata if datatype is 'wide'
#' @param datatype the format of gcdata, either 'long' (the default) or 'wide'
#' @return chemtable, a data frame of the compound
#' @examples
#' data(GCMSfloral)
#' \dontrun{chemtable <- make_chemtable(longdata, metadata)}
#' @export
make_chemtable <- function(gcdata, metadata, datatype = "long") {
if(datatype == "long") {
longdata <- gcdata
sampletable <- bouquet::make_sampletable(longdata, metadata)
chemtable <- data.frame(name=levels(longdata$name))
} else if(datatype == "wide") {
sampletable <- gcdata
chemtable <- data.frame(name=colnames(sampletable))
} else {
stop("datatype must be /'long/' or /'wide/'")
}
chemtable <- within(chemtable, {
if(datatype == "long") {
RT <- sapply(name, function(x) {median(longdata$RT[longdata$name==x])})
RT.var <- sapply(name, function(x) {var(longdata$RT[longdata$name==x])})
match <- sapply(name, function(x) {median(longdata$match[longdata$name==x])})
match.var <- sapply(name, function(x) {var(longdata$match[longdata$name==x])})
}
max.floral <- sapply(sampletable[metadata$type=="floral",], max)
max.ambient <- sapply(sampletable[metadata$type=="ambient",],max)
mean.floral <- sapply(sampletable[metadata$type=="floral",], mean)
mean.ambient <- sapply(sampletable[metadata$type=="ambient",],mean)
mean.nonzero.floral <- sapply(sampletable[metadata$type=="floral",], function(x){mean(x[x>0])})
mean.nonzero.ambient <-sapply(sampletable[metadata$type=="ambient",], function(x){mean(x[x>0])})
median.floral <- sapply(sampletable[metadata$type=="floral",], median)
median.ambient <- sapply(sampletable[metadata$type=="ambient",],median)
})
add_counts_freqs <- function(chemtable, sampletable, groups) {
for(g in levels(groups)[levels(groups)!=""]) { #"" are probably the ambients and blanks
chemtable[,paste0("count.",g)] <- sapply(na.omit(sampletable[groups==g,]), function(x) sum(x>0))
chemtable[,paste0("freq.",g)] <- sapply(na.omit(sampletable[groups==g,]), function(x) sum(x>0) / length(x))
}
return(chemtable)
}
chemtable <- add_counts_freqs(chemtable, sampletable, metadata$type)
for(grouping in attr(metadata, "group")) {
chemtable <- add_counts_freqs(chemtable, sampletable, metadata[,grouping])
}
return(chemtable)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.