## code to prepare `df_Q16658_WithOutOutliers` dataset goes here
library(magrittr)
`%!in%` = Negate(`%in%`)
outliers <- scan("data-raw/df_Q16658/outliers_fasta_headers.txt",
what = "character")
avoid_words <- c('delete','fragment','ubiquitin')
df_Q16658_WithOutOutliers <- FascinRSCA::df_Q16658 %>%
dplyr::filter(dplyr::if_all(c('Protein names',
'Gene names', 'Entry name'), ~stringr::str_to_lower(.) %>%
tidyr::replace_na('') %>% # Para no eliminar las entradas con un NA en la anotación
stringr::str_detect(paste(avoid_words,collapse = '|'))%>% `!`))%>%
dplyr::distinct(taxid, fasta_seq, .keep_all = TRUE)%>%
dplyr::filter(fasta_header %!in% outliers)
df_Q16658_WithOutOutliers$annotation <- df_Q16658_WithOutOutliers %>%
dplyr::filter(fasta_header %!in% outliers)%>%
dplyr::mutate(`Gene names` = toupper(`Gene names`),
`Protein names` = toupper(`Protein names`))%>%
tidyr::replace_na(list(`Gene names` = "", `Protein names` = "unknown"))%>%
dplyr::mutate(
annotation = dplyr::case_when(
stringr::str_detect(`Gene names`, 'FSCN2') & stringr::str_detect(`Protein names`, 'FASCIN.+2.+ISOFORM.+1') ~ "fascin2a",
stringr::str_detect(`Gene names`, 'FSCN2') & stringr::str_detect(`Protein names`, 'FASCIN.+2.+ISOFORM.+2') ~ "fascin2b",
stringr::str_detect(`Gene names`, 'FSCN1') & stringr::str_detect(`Protein names`, 'FASCIN') ~ "fascin1",
stringr::str_detect(`Gene names`, 'FSCN2') & stringr::str_detect(`Protein names`, 'FASCIN') ~ "fascin2",
!stringr::str_detect(`Gene names`, 'FSCN\\d') & stringr::str_detect(`Protein names`, 'FASCIN') ~ "fascin",
stringr::str_detect(`Gene names`, 'UNCHARACTERIZED') | stringr::str_detect(`Protein names`, 'UNCHARACTERIZED') ~ "uncharacterized",
!stringr::str_detect(`Gene names`, 'FSCN\\d') & (stringr::str_detect(`Protein names`, "SINGED(?!.+LIKE)")| stringr::str_detect(`Protein names`, 'SN')) ~ "singed",
TRUE ~ "unknown"
)) %>%
dplyr::pull(annotation)
usethis::use_data(df_Q16658_WithOutOutliers, overwrite = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.