# library(tm)
#
# ids <- dataset
#
# library(dplyr)
# # ids <- ids %>%
# filter(`percolator q-value` < 0.01) %>%
# select(sequence, `protein id`) %>%
# group_by(sequence) %>%
# summarise(all_matched_proteins = paste(`protein id`, sep = ",", collapse = ","))
# proteins <- ids[['all_matched_proteins']]
# names(proteins) <- ids[['sequence']]
#
# comma_tokens <- Regexp_Tokenizer(',')
# pp_as_corpus <- Corpus(VectorSource(ids[['all_matched_proteins']]))
# meta(pp_as_corpus)
# meta(pp_as_corpus, 'labels', 'indexed') <- names(proteins)
# pp_corpus
# pp_matrix <- DocumentTermMatrix(pp_as_corpus, control = list(tokenize = comma_tokens))
# nonsparse_pp_matrix <- as.matrix(pp_matrix)
# ?DocumentTermMatrix
# ?Corpus
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.