#' @import dplyr
#' @importFrom tidyr unnest
#' @importFrom stringr str_detect str_match str_replace_all
#' @importFrom tibble as_tibble
#' @importFrom cleaver cleavageRanges cleave
#'
#' @include xlinkTbl.R
.xcombr_db <- function(x,
xlSpecificity = "Amine:Amine",
linkType = "both",
minLength = 6,
maxLength = 24,
peptideTerminalLink = FALSE,
proteinTerminalLink = TRUE,
customSpecificity = NULL,
digest = T,
...,
max50 = TRUE) {
# Define valid arg values --------------------------------------------------
linkTypeValues <- c("inter", "intra", "both")
xlSpecificityValues <- c(xlinkTbl$name, "custom")
# Check args ---------------------------------------------------------------
.checkArgs(xlSpecificity,
linkType,
minLength,
maxLength,
peptideTerminalLink,
proteinTerminalLink,
customSpecificity,
max50,
# possible values for arguments:
linkTypeValues,
xlSpecificityValues)
# Retrieve Regular expressions based on `xlSpecificity`
if(tolower(xlSpecificity) == "custom") {
xlReg1 <- customSpecificity[1]
xlReg2 <- customSpecificity[2]
} else {
xlReg1 <- xlinkTbl$regex1[tolower(xlinkTbl$name) == tolower(xlSpecificity)]
xlReg2 <- xlinkTbl$regex2[tolower(xlinkTbl$name) == tolower(xlSpecificity)]
}
# Perform Digest using cleaver (but return a tibble)
if(digest) {
seqTbl <- as_tibble(as.data.frame(cleave(x, ..., unique = F)))
pos <- as_tibble(as.data.frame(cleavageRanges(x, ...)))
seqTbl <- cbind(seqTbl, pos[ , c("start", "end", "width")])
} else { # or not
seqTbl <- tibble(group = 1:length(x),
group_name = names(x),
value = as.character(x),
start = 1,
end = nchar(value),
width = nchar(value))
}
seqTbl <- seqTbl %>%
rename(name = group_name,
seq = value) %>%
select(-group) %>%
mutate(seq = toupper(seq))
# Filter peptides based on args --------------------------------------------
if(peptideTerminalLink) {
seqTbl <- seqTbl %>%
mutate(seq = paste0("n", seq, "c"))
} else if(proteinTerminalLink) {
seqTbl <- seqTbl %>%
group_by(name) %>%
mutate(seq = ifelse(start == min(start), paste0("n", seq), seq),
seq = ifelse(end == max(end), paste0(seq, "c"), seq))
}
seqTbl <- seqTbl %>%
ungroup() %>%
mutate(range = paste0("{", start, "-", end, "}")) %>%
filter(width >= minLength,
width <= maxLength) %>%
select(-start, -end, -width)
# Create 2 tables; one for each side of the crosslink
links <- seqTbl %>%
filter(str_detect(seq, xlReg1)) %>%
mutate(seq = str_replace_all(seq, "[nc]", "")) %>%
group_by(name, seq) %>%
summarize(range = paste(range, collapse = ","))
if(xlReg1 != xlReg2) {
link2 <- seqTbl %>%
filter(str_detect(seq, xlReg2)) %>%
mutate(seq = str_replace_all(seq, "[nc]", "")) %>%
group_by(name, seq) %>%
summarize(range = paste(range, collapse = ","))
links <- links %>% rbind(link2)
}
# Create cosslinked pairs --------------------------------------------------
combos <- as_tibble(expand.grid(seq1 = links$seq,
seq2 = links$seq,
stringsAsFactors = F)) %>%
left_join(links, by = c("seq1" = "seq")) %>%
left_join(links, by = c("seq2" = "seq"), suffix = c("1", "2")) %>%
distinct() %>%
group_by(seq1, seq2, name1, name2) %>%
mutate(shortName1 = str_match(name1, "(^\\S+)")[ , 2],
shortName2 = str_match(name2, "(^\\S+)")[ , 2],
accession = paste(sort(c(shortName1, shortName2)), collapse = "_"),
type = ifelse(name1 == name2, "intra", "inter"),
seq = paste0(seq1, seq2),
description = paste0("a=", seq1, " ", range1, " ", name1, " cx ",
"A=", seq2, " ", range2, " ", name2),
header = paste0(accession, " ", description)) %>%
ungroup() %>%
{if(linkType != "both") filter(., type == linkType) else .} %>%
select(seq, header)
# return AAStringSet ------------------------------------------------------------
seqs <- combos$seq
names(seqs) <- combos$header
seqs <- AAStringSet(seqs)
return(seqs)
}
# Argument Checking
.checkArgs <- function(xlSpecificity,
linkType,
minLength,
maxLength,
peptideTerminalLink,
proteinTerminalLink,
customSpecificity,
max50,
linkTypeValues,
xlSpecificityValues){
# xlSpecificity
if(!is.character(xlSpecificity) |
length(xlSpecificity) != 1) {
stop("xlSpecificity must be a character vector of length 1")
} else if(!(tolower(xlSpecificity) %in% tolower(xlSpecificityValues))) {
stop("Unrecognized xlSpecificity. Refer to ?xcomb for options.")
}
#linkType
if(!is.character(linkType) |
length(linkType) != 1) {
stop("linkType must be a character vector of length 1")
} else if(!(tolower(linkType) %in% tolower(linkType))) {
stop("Unrecognized linkType. Refer to xcombr_specificities() for options.")
}
# minLength (|| so we don't get extra warnings from conversions)
if(length(minLength) != 1 ||
!is.numeric(minLength) ||
minLength != as.integer(minLength) ||
minLength < 0) {
stop("minLength must be a single positive integer value")
}
# maxLength
if(length(maxLength) != 1 ||
!is.numeric(maxLength) ||
maxLength != as.integer(maxLength) ||
maxLength < 0) {
stop("maxLength must be a single positive integer value")
} else if(maxLength < minLength) {
stop("maxLength must be greater than or equal to minLength")
}
#peptideTerminalLink
if(length(peptideTerminalLink) != 1 ||
!is.logical(peptideTerminalLink)) {
stop("peptideTerminalLink must be a logical value (TRUE/FALSE)")
}
#proteinTerminalLink
if(length(proteinTerminalLink) != 1 ||
!is.logical(proteinTerminalLink)) {
stop("proteinTerminalLink must be a logical value (TRUE/FALSE)")
}
#proteinTerminalLink
if(length(digest) != 1 ||
!is.logical(digest)) {
stop("digest must be a logical value (TRUE/FALSE)")
}
#customSpecificity
if(tolower(xlSpecificity == "custom")) {
if(is.null(customSpecificity)) {
stop("customSpecificity is required when xlSpecificity = 'custom'.")
} else if(!is.character(customSpecificity) |
length(customSpecificity) != 2) {
stop("customSpecificity must be a character vector of length 2.")
}
}
#max50
if(!is.logical(max50) || length(max50) != 1) {
stop("max50 must be a logical value (TRUE/FALSE).")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.