View source: R/core_mutSignatures_scr_5.R
1 | attachMutType(mutData, ref_colName = "reference_allele", var_colName = "variant_allele", var2_colName = NULL, context_colName = "context", format = 1, mutType_dict = "alexa", mutType_colName = "mutType")
|
mutData |
|
ref_colName |
|
var_colName |
|
var2_colName |
|
context_colName |
|
format |
|
mutType_dict |
|
mutType_colName |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | ##---- Should be DIRECTLY executable !! ----
##-- ==> Define data, use random,
##-- or do help(data=index) for the standard data sets.
## The function is currently defined as
function (mutData, ref_colName = "reference_allele", var_colName = "variant_allele",
var2_colName = NULL, context_colName = "context", format = 1,
mutType_dict = "alexa", mutType_colName = "mutType")
{
if (!((is.data.frame(mutData) | is.matrix(mutData)) & sum(c(ref_colName,
var_colName, context_colName) %in% colnames(mutData)) ==
3))
stop("Issue with the input dataset. Make sure to feed in a data.frame or\n a matrix and double check the name of the fields pointing to chromosome\n name, start and end positions")
if (!(format %in% c(1, 2)))
stop("Please, specify a valid format number (example: 1)")
if (!(is.null(var2_colName))) {
if (!(var2_colName %in% colnames(mutData)))
stop("Invalid var2 column")
}
if (!is.character(mutType_colName) | length(mutType_colName) >
1)
stop("Bad mutType_colName")
if (mutType_colName %in% colnames(mutData))
stop("mutType_colName already exists as column name in the current dataset")
mutData <- data.frame(mutData, stringsAsFactors = FALSE,
row.names = NULL)
my.key.cols <- c(ref_colName, var_colName, var2_colName,
context_colName)
my.key.cols <- my.key.cols[!is.na(my.key.cols)]
for (clmn in my.key.cols) {
mutData[, clmn] <- as.character(as.vector(mutData[, clmn]))
}
message("Assigning mutation types ", appendLF = FALSE)
mutData[, mutType_colName] <- sapply(1:nrow(mutData), (function(i) {
if (nrow(mutData) > 1000 & i %in% as.integer(seq(1, nrow(mutData),
length.out = 20)))
message(".", appendLF = FALSE)
ctx.len <- nchar(mutData[i, context_colName])
half.ln <- (ctx.len - 1)/2
mid.seq <- substr(mutData[i, context_colName], (half.ln +
1), (half.ln + 1))
pre.seq <- substr(mutData[i, context_colName], 1, half.ln)
post.seq <- substr(mutData[i, context_colName], (half.ln +
2), ctx.len)
if (mid.seq != mutData[i, ref_colName] | (is.null(var2_colName) &
mid.seq == mutData[i, var_colName]) | (tryCatch({
(mid.seq == mutData[i, var_colName] & mid.seq ==
mutData[i, var2_colName])
}, error = function(e) {
FALSE
}))) {
mut.base <- NA
}
else {
if (mutData[i, ref_colName] != mutData[i, var_colName]) {
mut.base <- mutData[i, var_colName]
}
else if (!is.null(var2_colName)) {
if (mutData[i, ref_colName] != mutData[i, var2_colName]) {
mut.base <- mutData[i, var2_colName]
}
else {
mut.base <- NA
}
}
else {
mut.base <- NA
}
if (is.na(mut.base)) {
NA
}
else {
paste(mid.seq, ".", mut.base, "[", pre.seq, mid.seq,
post.seq, "][", pre.seq, mut.base, post.seq,
"]", sep = "", collapse = "")
}
}
}))
message(". Done!", appendLF = TRUE)
if (sum(is.na(mutData[, mutType_colName])) > 0) {
message(paste("Removing", sum(is.na(mutData[, mutType_colName])),
"positions."))
mutData <- mutData[!is.na(mutData[, mutType_colName]),
]
}
message("Now applying RevCompl transformation", appendLF = FALSE)
if (mutType_dict == "alexa") {
idx <- grep("^((G|A)\.)", mutData[, mutType_colName])
mutData[idx, mutType_colName] <- sapply(mutData[idx,
mutType_colName], (function(seq) {
base.wt <- revCompl(gsub("\..+$", "", seq))
base.mut <- revCompl(gsub("^.+\.", "", gsub("\[.+$",
"", seq)))
seq.wt <- revCompl(gsub("^.+\[", "", gsub("\]\[.+$",
"", seq)))
seq.mut <- revCompl(gsub("^.+\]\[", "", gsub("\]$",
"", seq)))
paste(base.wt, ".", base.mut, "[", seq.wt, "][",
seq.mut, "]", sep = "", collapse = "")
}))
}
else if (mutType_dict == "custom") {
idx <- grep("^((G|T)\.)", mutData[, mutType_colName])
mutData[idx, mutType_colName] <- sapply(mutData[idx,
mutType_colName], (function(seq) {
base.wt <- revCompl(gsub("\..+$", "", seq))
base.mut <- revCompl(gsub("^.+\.", "", gsub("\[.+$",
"", seq)))
seq.wt <- revCompl(gsub("^.+\[", "", gsub("\]\[.+$",
"", seq)))
seq.mut <- revCompl(gsub("^.+\]\[", "", gsub("\]$",
"", seq)))
paste(base.wt, ".", base.mut, "[", seq.wt, "][",
seq.mut, "]", sep = "", collapse = "")
}))
}
message(". Done!", appendLF = TRUE)
message("Final formatting", appendLF = FALSE)
mutData[, mutType_colName] <- sapply(mutData[, mutType_colName],
(function(seq) {
base.wt <- gsub("\..+$", "", seq)
base.mut <- gsub("^.+\.", "", gsub("\[.+$", "",
seq))
seq.wt <- gsub("^.+\[", "", gsub("\]\[.+$", "",
seq))
seq.mut <- gsub("^.+\]\[", "", gsub("\]$", "",
seq))
half.len <- (nchar(seq.wt) - 1)/2
pre.seq <- substr(seq.wt, 1, half.len)
post.seq <- substr(seq.wt, half.len + 2, nchar(seq))
if (format == 1) {
paste(pre.seq, "[", base.wt, ">", base.mut, "]",
post.seq, sep = "", collapse = "")
}
else if (format == 2) {
paste(pre.seq, post.seq, ".", base.wt, ">", base.mut,
sep = "", collapse = "")
}
else {
paste(base.wt, ".", base.mut, "[", seq.wt, "][",
seq.mut, "]", sep = "", collapse = "")
}
}))
message(". Done!", appendLF = TRUE)
return(mutData)
}
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.