read_partition <- function(x, i) {
Biostrings::readAAStringSet(x) %>%
base::as.data.frame() %>%
tibble::rownames_to_column(var = "PROTEIN") %>%
tidyr::separate(x, c("SEQUENCE", "TOPOLOGY"), sep = "#") %>%
dplyr::mutate(PARTITION = base::as.integer(i - 1))
}
PROTEINS <-
base::system.file("extdata", package = "TMHMM") %>%
base::dir(pattern = "set160.[0-9].labels", full.names = TRUE) %>%
purrr::imap(read_partition) %>%
dplyr::bind_rows() %>%
dplyr::mutate(PARTITION = forcats::as_factor(PARTITION)) %>%
dplyr::mutate_if(base::is.character, stringr::str_trim)
PREDICTIONS <-
base::system.file("extdata/160.predictions", package = "TMHMM") %>%
Biostrings::readAAStringSet() %>%
base::as.data.frame() %>%
tibble::rownames_to_column(var = "PROTEIN") %>%
dplyr::transmute(
PREDICTION = stringr::str_remove_all(x, "\\s+\\S+#\\s+\\S+0\\s")
) %>%
dplyr::mutate_if(base::is.character, stringr::str_trim)
VALIDATION <-
base::character()
for (i in 0:9) {
state_names <-
base::c("i", "M", "o")
symbol_names <-
base::c("A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P",
"Q", "R", "S", "T", "V", "W", "Y")
from <-
dplyr::filter(PROTEINS, PARTITION != i) %>%
magrittr::use_series(TOPOLOGY) %>%
stringr::str_replace_all("([aA-zZ])", " \\1") %>%
stringr::str_replace(" ([aA-zZ])", "\\1") %>%
stringr::str_split(" ") %>%
purrr::map(base::rev) %>%
purrr::map(magrittr::extract, -1) %>%
purrr::map(base::rev) %>%
purrr::reduce(base::c)
to <-
dplyr::filter(PROTEINS, PARTITION != i) %>%
magrittr::use_series(TOPOLOGY) %>%
stringr::str_replace_all("([aA-zZ])", " \\1") %>%
stringr::str_replace(" ([aA-zZ])", "\\1") %>%
stringr::str_split(" ") %>%
purrr::map(magrittr::extract, -1) %>%
purrr::reduce(base::c)
transition_probabilities <-
base::with(base::data.frame(from, to), base::table(from, to)) %>%
base::prop.table(1)
states <-
dplyr::filter(PROTEINS, PARTITION != i) %>%
magrittr::use_series(TOPOLOGY) %>%
stringr::str_replace_all("([aA-zZ])", " \\1") %>%
stringr::str_replace(" ([aA-zZ])", "\\1") %>%
stringr::str_split(" ") %>%
purrr::reduce(base::c)
symbols <-
dplyr::filter(PROTEINS, PARTITION != i) %>%
magrittr::use_series(SEQUENCE) %>%
stringr::str_replace_all("([aA-zZ])", " \\1") %>%
stringr::str_replace(" ([aA-zZ])", "\\1") %>%
stringr::str_split(" ") %>%
purrr::reduce(base::c)
emission_probabilities <-
base::with(base::data.frame(states, symbols),
base::table(states, symbols)) %>%
base::prop.table(1)
hmm_model <-
HMM::initHMM(state_names, symbol_names,
transProbs = transition_probabilities,
emissionProbs = emission_probabilities)
state_path <-
dplyr::filter(PROTEINS, PARTITION == i) %>%
magrittr::use_series(SEQUENCE) %>%
stringr::str_replace_all("([aA-zZ])", " \\1") %>%
stringr::str_replace(" ([aA-zZ])", "\\1") %>%
stringr::str_split(" ") %>%
purrr::map(~ HMM::viterbi(hmm_model, .x)) %>%
purrr::map(base::paste, sep = "", collapse = "") %>%
base::as.character()
VALIDATION <-
base::c(VALIDATION, state_path)
}
VALIDATION <-
base::as.data.frame(VALIDATION, stringsAsFactors = FALSE)
TMHMM <-
dplyr::bind_cols(PROTEINS, PREDICTIONS, VALIDATION) %>%
dplyr::mutate(PROTEIN = forcats::as_factor(PROTEIN)) %>%
dplyr::mutate_if(base::is.character, stringr::str_to_upper)
usethis::use_data(TMHMM)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.