#' Make label data.frame for miami plot
#'
#' @param data A data.frame object. Required.
#' @param hits_label_col Either the name of the column(s), max = 2, to use for
#' automatically labeling n hits, determined using \code{top_n_hits}, or the
#' column where the values you provide in \code{hits_label} can be found.
#' Required.
#' @param hits_label A user-specified character vector of probes/genes/SNPs
#' to label. Defaults to NULL.
#' @param top_n_hits How many of the top hits do you want to label? Defaults to
#' 5. Set to NULL to turn off this filtering, but this is not recommended
#' because the plot can get cluttered easily.
#' @param logged_p The name of the column containing your logged p-value
#' information. Defaults to "logged_p," assuming data preparation with
#' \code{prep_miami_data}
#' @param rel_pos The name of the column containing the positon of each
#' SNP/probe relative to all other SNPs/probes in the genome. Defaults to
#' "rel_pos," assuming data preparation with \code{prep_miami_data}.
#' @examples
#' \dontrun{
#' # If you have already run prep_miami_data and want to label the top 5 hits
#' # with SNP and gene name:
#' upper_top5_labels <- make_miami_labels(data = plot_data$upper,
#' hits_label_col = c("SNP", "Gene"))
#'
#' # If you have already run prep_miami_data and want to label the top 10 hits
#' # from the AHRR, TBX15, and RARA genes:
#' upper_top10_labels <- make_miami_labels(data = plot_data$upper,
#' hits_label_col = "Gene",
#' hits_label =
#' c("AHRR", "TBX15", "RARA"),
#' top_n_hits = 10)
#'
#' # When labeling, the items in hits_label must all come from a single column.
#' # Specifying multiple columns will return an error.
#' ggmiami(data = gwas_results, split_by = "study", split_at = "A",
#' hits_label_col = c("chr", Gene"),
#' hits_label = c("1", AHRR", "2", "TBX15", "RARA"), top_n_hits = 10)
#'
#' # If you have not run prep_miami_data:
#' labels <- make_miami_labels(datat = df, hits_label_col = c("SNP", "pos"),
#' logged_p = "logp",
#' rel_pos = "relative_SNP_position")
#' }
#' @export
#' @return A data.frame with three columns: relative position, logged p-value,
#' and the label to add to the miami plot. Designed to be used with ggrepel.
#' @author Julie White
#' @references \url{https://github.com/juliedwhite/miamiplot}
#'
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#'
make_miami_labels <- function(data, hits_label_col, hits_label = NULL,
top_n_hits = 5, logged_p = "logged_p",
rel_pos = "rel_pos") {
# If the user has not requested specific labels
if (is.null(hits_label)) {
# Check that the columns given in hits_label_col are named in the dataframe.
checkmate::assertNames(hits_label_col, type = "named",
subset.of = colnames(data))
# If the user has given one column name in "hits_label"
if (length(hits_label_col) == 1) {
label_df <- data %>%
dplyr::mutate(label = !!rlang::sym(hits_label_col)) %>%
dplyr::select(!!rlang::sym(rel_pos), !!rlang::sym(logged_p),
.data$label)
# Or, if the user has given two column names in "hits_label"
} else if (length(hits_label_col) == 2) {
label_df <- data %>%
dplyr::mutate(label = paste0(!!rlang::sym(hits_label_col[1]), "\n",
!!rlang::sym(hits_label_col[2]))) %>%
dplyr::select(!!rlang::sym(rel_pos), !!rlang::sym(logged_p),
.data$label)
}
# If instead the user has given a character vector of which SNPs/probes/genes
# to label
} else if (!is.null(hits_label)) {
# Check that the column given in hits_label_col is named in the dataframe
checkmate::assertNames(hits_label_col, type = "named",
subset.of = colnames(data))
# and has length = 1, because we can only match one column.
if (length(hits_label_col) > 1) {
stop("You have given two column names in addition to specifying ",
"hits_label. This function cannot currently match items in more ",
"one column, so please only provide one column where the items in ",
"hits_label can be found.")
}
# Check if the column contains special characters, which makes it difficult
# to identify and sort the labels.
if (!rlang::is_empty(grep("[[:punct:]]", data[, hits_label_col]))) {
message("The column {", hits_label_col, "} has special characters in it.",
" This function only respects exact matches, so your labels may ",
"be incorrect. Please consider removing the special characters ",
"from this column or providing exact matches.", appendLF = TRUE)
}
# Filter the data to get the position of these labels
label_df <- data %>%
dplyr::filter(!!rlang::sym(hits_label_col) %in% hits_label) %>%
dplyr::mutate(label = !!rlang::sym(hits_label_col)) %>%
dplyr::select(!!rlang::sym(rel_pos), !!rlang::sym(logged_p),
.data$label)
# If label_df is empty, it means we did not find any matches so throw a
# warning.
if (nrow(label_df) == 0) {
warning("I could not find any matches for {",
paste(hits_label, collapse = ", "), "} in {", hits_label_col,
"}. Perhaps there was a typo?\n")
}
}
# If top_n_hits is a number, re-arrange the dataframe and select the top n
if (checkmate::testCount(top_n_hits)) {
label_df <- label_df %>%
dplyr::arrange(dplyr::desc(!!rlang::sym(logged_p))) %>%
dplyr::slice(1:top_n_hits)
# Return the df
return(label_df)
} else if (is.null(top_n_hits)) {
# Top_n_hits is null, so just return full df
return(label_df)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.