# probabilites from drake repo
weights <- readRDS("./inst/weights.Rds")
vis_dat_fit_best <- read.table("./inst/vis_dat_fit_best.csv", sep = ";", dec = ",", header = 1)
for (col in colnames(vis_dat_fit_best)) {
vis_dat_fit_best[[col]] <- sub(",", ".", vis_dat_fit_best[[col]])
}
for (col in c("AUC_mean", "MCC_mean", "Sens_mean", "Spec_mean", "AUC_sd",
"MCC_sd", "Sens_sd", "Spec_sd", "Cosine_similarity")) {
vis_dat_fit_best[[col]] <- as.numeric(vis_dat_fit_best[[col]])
}
#
PATH = "~/amylogram/AmyloGramAnalysis/"
library(seqinr)
source(paste0(PATH, "./functions/encode_amyloids.R"))
load(paste0(PATH, "./data/aa_groups.RData"))
aa_groups <- string2list(aa_groups)
raw_seqs_list <- c(read.fasta(paste0(PATH, "./data/amyloid_pos_full.fasta"),seqtype = "AA"),
read.fasta(paste0(PATH, "./data/amyloid_neg_full.fasta"),seqtype = "AA"))
#sequences longer than 5 aa and shorter than 26 aa
purified_seqs_id <- lengths(raw_seqs_list) > 5 & lengths(raw_seqs_list) < 26
seqs_list <- raw_seqs_list[purified_seqs_id]
seqs_m <- tolower(t(sapply(seqs_list, function(i)
c(i, rep(NA, max(lengths(seqs_list)) - length(i))))))
raw_seqs_positive <- read.fasta(paste0(PATH, "./data/amyloid_pos_full.fasta"),seqtype = "AA")
raw_seqs_negative <- read.fasta(paste0(PATH, "./data/amyloid_neg_full.fasta"),seqtype = "AA")
seqs_list_positive <- raw_seqs_positive[lengths(raw_seqs_positive) > 5 & lengths(raw_seqs_positive) < 26]
seqs_list_negative <- raw_seqs_negative[lengths(raw_seqs_negative) > 5 & lengths(raw_seqs_negative) < 26]
seqs_m_pos <- tolower(t(sapply(seqs_list_positive, function(i)
c(i, rep(NA, max(lengths(seqs_list_positive)) - length(i))))))
seqs_m_neg <- tolower(t(sapply(seqs_list_negative, function(i)
c(i, rep(NA, max(lengths(seqs_list_negative)) - length(i))))))
occ_positive <- table(seqs_m_pos)
occ_negative <- table(seqs_m_neg)
# delete "-" from occurences
occ_positive <- occ_positive[2:length(occ_positive)]
probs_positive <- occ_positive / sum(occ_positive)
probs_negative <- occ_negative / sum(occ_negative)
# biogram::degenerate
create_encoded_probabilites <- function(encoding, probs_positive, probs_negative) {
new_names_pos <- biogram::degenerate(names(probs_positive), encoding)
new_names_neg <- biogram::degenerate(names(probs_negative), encoding)
new_motifProbs <- unlist(lapply(1:length(encoding),
function(x) sum(probs_positive[new_names_pos == x])))
new_seqProbs <- unlist(lapply(1:length(encoding),
function(x) sum(probs_negative[new_names_neg == x])))
list(positive = new_motifProbs,
negative = new_seqProbs)
}
#add as a benchmark two encodings from the literature
aa1 = list(`1` = c("g", "a", "p", "v", "l", "i", "m"),
`2` = c("k", "r", "h"),
`3` = c("d", "e"),
`4` = c("f", "w", "y", "s", "t", "c", "n", "q"))
aa2 = list(`1` = c("g", "a", "p", "v", "l", "i", "m", "f"),
`2` = c("k", "r", "h"),
`3` = c("d", "e"),
`4` = c("s", "t", "c", "n", "q", "y", "w"))
library(AmyloGram)
amylogram_model_encoding <- AmyloGram_model[["enc"]]
amylogram_model_encoding <- list(`1` = "g",
`2` = c("k", "p", "r"),
`3` = c("i", "l", "v"),
`4` = c("f", "w", "y"),
`5` = c("a", "c", "h", "m"),
`6` = c("d","e", "n", "q", "s", "t"))
encodingProbs <- list(amylogram_encoding = create_encoded_probabilites(amylogram_model_encoding,
probs_positive,
probs_negative),
aa1 = create_encoded_probabilites(aa1,
probs_positive,
probs_negative),
aa2 = create_encoded_probabilites(aa2,
probs_positive,
probs_negative))
library(QuiPTsim)
lapply(encodingProbs, function(x) cosine_similarity(x$positive, x$negative))
saveRDS(encodingProbs, "./inst/encodingProbs.Rds")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.