#' identify_sensillum
#'
#' correlates the result from a SSR recording of several odorants against all
#' DoOR response profiles
#'
#' @param recording data frame, a data frame with the following columns
#' "odorants" containing InChIKeys of the tested odorrant, and one column
#' called "unit1" etc. for each unit, containing responses (or estimates)
#' scaled between 0 and 1 (see examples)
#' @param response_matrix DoOR response matrix, the data to compair against
#' @param odor_data data frame, contains the odorant information.
#' @param door_mappings the data frame containing the mapping information
#' @param tag character, the chemical identifier to use in plots, one of
#' \code{colnames(odor)}
#' @param min.cor numeric, a minimum correlation value, the function will check
#' wether there is a higher correlation for all units within a single
#' sensillum
#' @param nshow numeric, the number of plots to nshow, plot e.g. only the top 10
#' matches
#' @param plot logical, if TRUE returns the plot, else returns the data frame
#' with the correlations/distances
#' @param method character, the method for similarity calculations: correlation
#' ("cor") or Euclidean distances ("dist")
#' @param sub character, if you know the class of sensillum you were recording
#' from you can restrict the search to this subset here ("ab", "ac", "at",
#' "pb", "sac")
#' @param use character, the "use" option from the \code{cor} function, "all"
#' returns NA when pairs are incomplete, "na.or.complete" only uses complete
#' observations to calculate correlations; see \code{\link{cor}} for details
#' @param base_size numeric, the base font size of the ggplot plots
#'
#' @author Daniel Münch <\email{daniel.muench@@uni-konstanz.de}>
#' @aliases identifySensillum identify_sensillum
#'
#' @return eithe& Carolin G.(†27)r a plot (gtable) with responses sorted by
#' highest correlations or lowest distances, or a data frame containing all
#' calculated correlations or Euclidean distances
#' @export
#' @import DoOR.data
#' @importFrom stats cor na.omit dist cor.test
#'
#' @examples
#' # load data
#' library(DoOR.data)
#'
#' # create an example recording
#' recording <- data.frame(
#' odorants = c(trans_id(c("BEDN", "ETAS"), "Code"),
#' trans_id("carbon dioxide", "Name")),
#' unit1 = c(.9,.1,.1),
#' unit2 = c(0, .1, 1)
#' )
#'
#' # run the identification
#' identify_sensillum(recording)
#' identify_sensillum(recording, method = "dist", nshow = 5)
#'
identify_sensillum <- function(recording,
response_matrix = door_default_values("door_response_matrix"),
odor_data = door_default_values("odor"),
door_mappings = door_default_values("door_mappings"),
tag = "Name",
min.cor = .9,
nshow = 10,
method = "cor",
sub,
base_size = 12,
plot = TRUE,
use = "everything") {
if(plot == TRUE) {
if (!requireNamespace("ggplot2", quietly = TRUE))
stop(
"ggplot2 is required for plotting, please install via
install.packages('ggplot2')",
call. = FALSE
)
if (!requireNamespace("grid", quietly = TRUE))
stop("grid is required for plotting, please install via
install.packages('grid')",
call. = FALSE)
if (!requireNamespace("gridExtra", quietly = TRUE))
stop(
"gridExtra is required for plotting, please install via
install.packages('gridExtra')",
call. = FALSE
)
}
recording$odorants <- as.character(recording$odorants)
data <- response_matrix[recording$odorants,]
if(is.na(nshow))
nshow <- length(data)
if (!missing(sub)) {
subset <-
as.character(door_mappings$receptor[grep(paste0("^", sub, collapse = "|"),
door_mappings$sensillum)])
which(colnames(data) %in% subset)
data <- data[, which(colnames(data) %in% subset)]
}
if (method == "cor") {
# calulate correlations
units <- colnames(recording)[-1]
result <- data.frame(receptor = colnames(data))
result$sensillum <-
door_mappings$sensillum[match(result$receptor, door_mappings$receptor)]
result$OSN <-
door_mappings$OSN[match(result$receptor, door_mappings$receptor)]
for (i in seq_along(units)) {
corx <-
apply(data, 2, function(x)
cor(x, recording[, units[i]], use = use))
corx <- data.frame(receptor = names(corx), value = corx)
colnames(corx)[2] <- units[i]
result <- merge(result, corx)
}
# print results
cor.tmp <- droplevels(subset(result, sensillum != ""))
for (i in levels(cor.tmp$sensillum)) {
tmp <- subset(cor.tmp, sensillum == i)
if (all(apply(as.data.frame(tmp[, -c(1:3)]), 2 , function(x)
any(na.omit(x) > min.cor)))) {
message(
paste(
"found correlations above ",
min.cor,
" for all ",
length(units),
" units in: ",
i,
sep = ""
)
)
}
}
}
if (method == "cor.test") {
# calulate correlations
units <- colnames(recording)[-1]
result <- data.frame(receptor = colnames(data))
result$sensillum <-
door_mappings$sensillum[match(result$receptor, door_mappings$receptor)]
result$OSN <-
door_mappings$OSN[match(result$receptor, door_mappings$receptor)]
for (i in seq_along(units)) {
corx <- c()
px <- c()
for (j in colnames(data)) {
tmp <- try(cor.test(data[, j], recording[, units[i]]), silent = TRUE)
if (class(tmp) == "try-error") {
corx <- c(corx, NA)
px <- c(px, NA)
} else {
corx <- c(corx, tmp$estimate)
px <- c(px, tmp$p.value)
}
}
corx <-
data.frame(receptor = colnames(data),
value = corx,
p.value = px)
colnames(corx)[2] <- units[i]
colnames(corx)[3] <- paste0(units[i], ".pval")
result <- merge(result, corx)
}
# print results
cor.tmp <- droplevels(subset(result, sensillum != ""))
for (i in levels(cor.tmp$sensillum)) {
tmp <- subset(cor.tmp, sensillum == i)
if (all(apply(as.data.frame(tmp[, -c(1:3, 5)]), 2 , function(x)
any(na.omit(x) > min.cor)))) {
message(
paste(
"found correlations above ",
min.cor,
" for all ",
length(units),
" units in: ",
i,
sep = ""
)
)
}
}
}
if (method == "dist") {
# calc distances
units <- colnames(recording)[-1]
result <- rbind(t(recording[, -1, drop = FALSE]), t(data))
result <- as.matrix(dist(result))
result <-
result[(length(units) + 1):nrow(result), seq_along(units), drop = FALSE]
result <- data.frame(
receptor = rownames(result),
sensillum = door_mappings$sensillum[match(rownames(result),
door_mappings$receptor)],
OSN = door_mappings$OSN[match(rownames(result), door_mappings$receptor)],
result
)
}
# plots
# change odor identifier for plotting
if (tag != "InChIKey") {
recording$odorants <-
odor_data[match(recording$odorants, odor_data$InChIKey), tag]
rownames(data) <-
odor_data[match(rownames(data), odor_data$InChIKey), tag]
}
if (plot == TRUE) {
# plot data
data.melt <- door_melt(data)
plots <- list()
for (i in seq_along(units)) {
if (method %in% c("cor"))
cor.tmp <-
result[order(result[, units[i]], decreasing = TRUE), ][1:nshow,
c("receptor", units[i])]
if (method %in% c("cor.test")) {
cor.tmp <-
result[order(result[, units[i]], decreasing = TRUE), ][1:nshow,
c("receptor", units[i], paste0(units[i], ".pval"))]
colnames(cor.tmp)[3] <- "pval"
}
if (method == "dist")
cor.tmp <-
result[order(result[, units[i]], decreasing = FALSE), ][1:nshow,
c("receptor", units[i])]
colnames(cor.tmp)[2] <- "cor"
cor.tmp$OSN <-
door_mappings$OSN[match(cor.tmp$receptor, door_mappings$receptor)]
if (method == "cor.test") {
cor.tmp$label <-
paste0(
cor.tmp$OSN,
" (",
cor.tmp$receptor,
")",
"\n",
"cor: ",
round(cor.tmp$cor, 3),
"; p: ",
format(cor.tmp$pval, scientific = TRUE, digits = 2)
)
} else {
cor.tmp$label <-
paste(
cor.tmp$OSN,
" (",
cor.tmp$receptor,
")",
"\n",
method,
": ",
round(cor.tmp$cor, 3),
sep = ""
)
}
data.tmp <-
droplevels(subset(data.melt, dataset %in% cor.tmp$receptor))
data.tmp$label <-
cor.tmp$label[match(data.tmp$dataset, cor.tmp$receptor)]
data.tmp$label <-
factor(data.tmp$label, levels = cor.tmp$label)
p <-
ggplot2::ggplot(data.tmp,
ggplot2::aes(
x = odorant,
y = value,
fill = odorant,
color = odorant
)) +
ggplot2::geom_bar(position = "identity",
stat = "identity",
alpha = .6) +
ggplot2::facet_wrap( ~ label, nrow = 1) +
ggplot2::theme_minimal(base_size = base_size) +
ggplot2::theme(
panel.border = ggplot2::element_rect(fill = NA, color = "grey"),
axis.ticks.x = ggplot2::element_blank(),
axis.text.x = ggplot2::element_blank(),
axis.title.x = ggplot2::element_blank()
)
assign(paste("p", i, sep = "."), p)
}
# plot recording
for (i in seq_along(units)) {
rec.tmp <-
data.frame(
odorant = recording$odorants,
value = recording[, units[i]],
unit = paste0(units[i], "\n")
)
r <-
ggplot2::ggplot(rec.tmp,
ggplot2::aes(
x = odorant,
y = value,
fill = odorant,
color = odorant
)) +
ggplot2::geom_bar(position = "identity",
stat = "identity",
alpha = .6) +
ggplot2::theme_minimal(base_size = base_size) +
ggplot2::theme(
panel.border = ggplot2::element_rect(fill = NA, color = "grey"),
axis.ticks.x = ggplot2::element_blank(),
axis.text.x = ggplot2::element_blank(),
axis.title.x = ggplot2::element_blank()
) +
#ggplot2::ggtitle(paste("unit", i, sep="")) +
ggplot2::facet_wrap( ~ unit) +
ggplot2::geom_text(
ggplot2::aes(y = .01, label = odorant),
angle = 90,
hjust = 0,
vjust = .5,
size = .3 * base_size,
color = "black"
)
assign(paste("r", i, sep = "."), r)
}
#build grobs
for (i in seq_along(units)) {
plots[[i]] <- gridExtra::arrangeGrob(
grobs = list(
get(paste("r", i, sep = ".")) +
ggplot2::theme(legend.position = "none"),
get(paste("p", i, sep = ".")) +
ggplot2::theme(legend.position = "none")
),
left = units[i],
nrow = 1,
widths = c(.2, .8)
)
}
if (method == "dist")
plots[["top"]] <- paste(nshow, "lowest euclidean distances")
if (method == "cor")
plots[["top"]] <- paste(nshow, "highest correlations")
plots[["ncol"]] <- 1
p <- do.call(gridExtra::arrangeGrob, plots)
grid::grid.newpage()
grid::grid.draw(p)
} else {
return(result)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.