#' Title
#'
#' @param df_long
#' @param ref
#' @param pos_col
#' @param seq_col
#' @param name_col
#' @param seq_original
#' @param match_symbol
#' @param mismatch_symbol
#' @param change_pattern
#' @param pattern_mismatch_as
#' @param change_ref
#' @param ref_mismatch_as
#' @param insertion_as
#'
#' @return
#' @export
#'
#' @examples
compare_seq_df_long <- function(df_long,
ref,
pos_col = "position",
seq_col = "seq",
name_col = "seq.name",
seq_original = "seq_original",
match_symbol = ".",
mismatch_symbol = "x",
change_pattern = F,
pattern_mismatch_as = c("base", "mismatch_symbol"),
change_ref = F,
ref_mismatch_as = c("base", "mismatch_symbol"),
insertion_as = c("base", "insertion")) {
# insertion_as can be base or any other character, like "x"
if (!change_pattern && !change_ref) {
if (!is.null(seq_original)) {
if (!seq_col %in% names(df_long)) {
stop(seq_col, " not found in df_long.")
}
df_long[[seq_original]] <- df_long[[seq_col]]
}
return(df_long)
}
if (!seq_col %in% names(df_long)) {
stop(seq_col, " not found in df_long.")
}
if (!pos_col %in% names(df_long)) {
stop(pos_col, " not found in df_long.")
}
if (!name_col %in% names(df_long)) {
stop(name_col, " not found in df_long.")
}
if (missing(ref)) {
stop("ref is missing.")
}
if (!ref %in% df_long[,name_col,drop=T]) {
stop(ref, " not in ", name_col, " column of df_long.")
}
pattern_mismatch_as <- match.arg(pattern_mismatch_as, c("base", "mismatch_symbol"))
ref_mismatch_as <- match.arg(ref_mismatch_as, c("base", "mismatch_symbol"))
insertion_as <- match.arg(insertion_as, c("base", "insertion"))
name_order <- levels(df_long[,name_col,drop=T])
if (is.null(name_order)) {
name_order <- unique(df_long[,name_col,drop=T])
}
non_ref <- name_order[which(name_order != ref)]
df2 <-
df_long %>%
dplyr::select(!!rlang::sym(pos_col), !!rlang::sym(seq_col), !!rlang::sym(name_col)) %>%
tidyr::pivot_wider(names_from = !!rlang::sym(name_col), values_from = !!rlang::sym(seq_col))
# matches to pattern
if (change_pattern) {
df2 <- df2 %>% tidyr::pivot_longer(cols = dplyr::all_of(names(.)[which(!names(.) %in% c(ref, pos_col))]))
if (pattern_mismatch_as == "base") {
df2 <- dplyr::mutate(df2, value = ifelse(value == !!rlang::sym(ref), match_symbol, value))
} else if (pattern_mismatch_as == "mismatch_symbol") {
df2 <- dplyr::mutate(df2, value = ifelse(value == !!rlang::sym(ref), match_symbol, mismatch_symbol))
}
if (insertion_as != "base") {
df2 <- dplyr::mutate(df2, value = ifelse(!!rlang::sym(ref) == "-" & value != "-", insertion_as, value))
}
df2 <- tidyr::pivot_wider(df2, names_from = name, values_from = value)
}
# matches to ref
if (change_ref) {
if (ref_mismatch_as == "base") {
mismatch_replace <- df2[[ref]]
} else if (ref_mismatch_as == "mismatch_symbol") {
mismatch_replace <- rep(mismatch_symbol, nrow(df2))
}
if (change_pattern) {
df2[[ref]] <- ifelse(apply(df2[,c(ref, non_ref)], 1, function(x) length(unique(x[intersect(which(!is.na(x)), which(x != match_symbol))]))) == 1, match_symbol, mismatch_replace)
} else {
df2[[ref]] <- ifelse(apply(df2[,c(ref, non_ref)], 1, function(x) length(unique(x[which(!is.na(x))]))) == 1, match_symbol, mismatch_replace)
}
if (insertion_as != "base") {
# any or all? any(x[-1] == "-")
df2[[ref]] <- ifelse(apply(df2[,c(ref, non_ref)], 1, function(x) (!x[1] %in% c("-", match_symbol, mismatch_symbol) && any(x[-1][which(!is.na(x[-1]))] == "-"))), insertion_as, df2[[ref]])
}
}
df2 <- df2 %>% tidyr::pivot_longer(cols = dplyr::all_of(names(.)[which(names(.) != pos_col)]), names_to = name_col, values_to = seq_col)
if (!is.null(seq_original)) {
df2 <- dplyr::left_join(df2, df_long %>% dplyr::rename({{seq_original}} := !!rlang::sym(seq_col)), by = dplyr::join_by(!!rlang::sym(pos_col), !!rlang::sym(name_col)))
}
df2 <- dplyr::mutate(df2, {{name_col}} := factor(!!rlang::sym(name_col), levels = name_order))
# join other columns from initial data frame
if (any(!names(df_long)[-which(names(df_long) == seq_col)] %in% names(df2))) {
df2 <- dplyr::left_join(df2, dplyr::select(df_long, -!!rlang::sym(seq_col)), by = dplyr::join_by(!!rlang::sym(pos_col), !!rlang::sym(name_col)))
}
return(df2)
}
## other procedure:
## assign matches and mismatches
## similar procedure as in igsc::MultiplePairwiseAlignmentsToOneSubject
'if (any(unlist(matches_to_origin_and_feature))) {
match_mismatch_list <- lapply(stats::setNames(names(df0)[-c(1,2)], names(df0)[-c(1,2)]), function(x) df0[,x] == df0[,"origin"]) # subject seq in df (not df.original) may already contain "insertion"; for overlapping pattern where one receives an insertion and the other not, this is relevant
if (any(sapply(matches_to_origin_and_feature, "[", 1))) {
# do it outside of loop below to avoid double calculation
matches <- purrr::pmap_lgl(match_mismatch_list, function(...) any_false(unlist(list(...))))
}
df0 <- purrr::map(setNames(matches_to_origin_and_feature, sapply(matches_to_origin_and_feature, function(x) paste(ifelse(x, "match", "base"), collapse = "_"))), function(y) {
if (any(y)) {
df0_match <- df0
if (y[2]) {
# matches/mismatches to patterns
for (i in names(match_mismatch_list)) {
df0_match[,i] <- ifelse(match_mismatch_list[[i]], "match", "mismatch")
}
}
if (y[1]) {
# matches/mismatches to subject
df0_match[which(matches),"origin"] <- "match"
df0_match[which(!matches),"origin"] <- "mismatch"
}
return(df0_match)
} else {
return(df0)
}
})
} else {
df0 <- list("base_base" = df0)
}'
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.