Nothing
#' Mutation data file loader
#'
#' This function attempts to load the mutation information file.
#'
#' @param mutation_file Path of the mutation information file.
#' @param sample_name Sample name.
#' @param ref_genome Reference genome for the data.
#' @param chr_no Reference genome chromosome number (human=24, mouse=22).
#' @param simple_repeat_list Optional, set simple repeat bed file path.
#' @return df_mutation
#' @importFrom dplyr %>%
#' @importFrom dplyr select
#' @importFrom dplyr filter
#' @importFrom dplyr mutate
#' @importFrom dplyr case_when
#' @importFrom dplyr arrange
#' @importFrom dplyr case_when
#' @importFrom BiocGenerics as.data.frame
#' @importFrom stringr str_sub
#' @importFrom stringr str_detect
#' @importFrom stats complete.cases
#' @importFrom utils read.csv
#' @examples
#' fun_load_mutation(
#' system.file("extdata", "mutation_list.tsv", package = "MicroSEC"),
#' "sample",
#' BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38,
#' 24
#' )
#' @export
fun_load_mutation <- function(mutation_file,
sample_name,
ref_genome,
chr_no,
simple_repeat_list = "") {
Sample <- NULL
Mut_type <- NULL
Chr <- NULL
Pos <- NULL
Ref <- NULL
Alt <- NULL
RefLen <- NULL
AltLen <- NULL
Mut_len <- NULL
SimpleRepeat_TRF <- NULL
Neighborhood_sequence <- NULL
Neighbor_start_1 <- NULL
Neighbor_start_2 <- NULL
Neighbor_end_1 <- NULL
Neighbor_end_2 <- NULL
Pre_Neighbor <- NULL
Post_Neighbor <- NULL
V1 <- NULL
V2 <- NULL
V3 <- NULL
if (ref_genome@user_seqnames[[1]] == "chr1") {
chromosomes <- paste0("chr", c(seq_len(chr_no - 2),"X", "Y"))
}
if (ref_genome@user_seqnames[[1]] == "1") {
chromosomes <- paste0("", c(seq_len(chr_no - 2),"X", "Y"))
}
# load somatic mutation list
df_mutation <- read.csv(mutation_file,
stringsAsFactors = FALSE,
header = TRUE,
check.names = F,
sep = "\t")
df_mutation <- df_mutation[complete.cases(df_mutation$Sample),] %>%
filter(Sample == sample_name)
df_mutation <- df_mutation[order(df_mutation$Chr, df_mutation$Pos),]
# data formatting
df_mutation <- df_mutation %>% mutate(
Alt = dplyr::case_when(
Alt == "*" ~ str_sub(Ref, 1, 1),
TRUE ~ Alt
)
)
if (length(df_mutation$Neighborhood_sequence) > 0) {
if (df_mutation$Neighborhood_sequence[[1]] == "-") {
df_mutation$RefLen = nchar(df_mutation$Ref)
df_mutation$AltLen = nchar(df_mutation$Alt)
df_mutation = df_mutation %>% mutate(
Mut_type = case_when(
RefLen == AltLen ~ paste0(RefLen, "-snv"),
RefLen > AltLen ~ paste0(RefLen-1, "-del"),
RefLen < AltLen ~ paste0(AltLen-1, "-ins"),
)
) %>% select(-RefLen, -AltLen)
}
if (!"SimpleRepeat_TRF" %in% colnames(df_mutation)) {
df_mutation$SimpleRepeat_TRF <- "-"
}
if (file.exists(simple_repeat_list) &
!df_mutation$SimpleRepeat_TRF[1] %in% c("Y", "N")) {
simple_repeat_info <- read.csv(simple_repeat_list,
stringsAsFactors = FALSE,
header = FALSE,
check.names = FALSE,
sep = "\t")[,1:3]
simple_repeat_info <- simple_repeat_info %>%
filter(V1 %in% chromosomes)
chr_now <- chromosomes[1]
simple_repeat_now <- simple_repeat_info %>%
filter(V1 == chr_now)
for (k in seq_len(length(df_mutation$SimpleRepeat_TRF))) {
if (df_mutation$Chr[k] != chr_now) {
chr_now <- df_mutation$Chr[k]
simple_repeat_now <- simple_repeat_info %>%
filter(V1 == chr_now)
}
if (length((simple_repeat_now %>%
filter(V1 == df_mutation$Chr[k] &
V2 <= df_mutation$Pos[k] &
V3 >= df_mutation$Pos[k]))$V1)) {
df_mutation$SimpleRepeat_TRF[k] <- "Y"
}
}
}
df_mutation = df_mutation %>% mutate(
SimpleRepeat_TRF = case_when(
is.na(SimpleRepeat_TRF) ~ "N",
SimpleRepeat_TRF == "-" ~ "N",
SimpleRepeat_TRF == "" ~ "N",
SimpleRepeat_TRF == "Y" ~ "Y",
SimpleRepeat_TRF == "N" ~ "N",
SimpleRepeat_TRF == TRUE ~ "Y",
TRUE ~ "N"
)
)
if (df_mutation$Neighborhood_sequence[[1]] == "-") {
fun_genome <- function(x, y, z) {
r <- NULL
for (i in seq_len(length(x))) {
r <- c(r, as.character(ref_genome[[x[i]]][y[i]:z[i]]))
}
return(r)
}
df_mutation$Chr <- as.character(df_mutation$Chr)
df_mutation$Chr_original <- df_mutation$Chr
if (str_sub(df_mutation$Chr[[1]], start=1, end=3) != "chr" &
ref_genome@user_seqnames[[1]] == "chr1") {
df_mutation$Chr <- paste("chr", df_mutation$Chr, sep="")
}
df_mutation$Pos <- as.integer(df_mutation$Pos)
mut_len <- as.integer(str_split(df_mutation[,"Mut_type"], "-",
simplify = TRUE)[,1])
mut_type <- str_split(df_mutation[,"Mut_type"], "-",
simplify = TRUE)[,2]
df_mutation <- df_mutation %>% mutate(
mut_len = mut_len,
mut_type = mut_type)
df_mutation <- df_mutation %>% mutate(
Neighbor_start_1 =
ifelse(
mut_type == "ins", Pos - 19,
ifelse(
mut_type == "del", Pos - 19, Pos - 20)),
Neighbor_end_1 = Pos - 1,
Neighbor_start_2 =
ifelse(
mut_type == "ins", Pos + 1,
ifelse(
mut_type == "del", Pos + mut_len + 1, Pos + mut_len)),
Neighbor_end_2 =
ifelse(
mut_type == "ins", Pos + 20,
ifelse(
mut_type == "del", Pos + mut_len + 20, Pos + mut_len + 19)))
df_mutation <- df_mutation %>% mutate(
Pre_Neighbor = fun_genome(Chr, Neighbor_start_1, Neighbor_end_1),
Post_Neighbor = fun_genome(Chr, Neighbor_start_2, Neighbor_end_2))
df_mutation <- df_mutation %>% mutate(
Neighborhood_sequence =
paste0(Pre_Neighbor, Alt, Post_Neighbor)) %>%
select(-Pre_Neighbor, -Post_Neighbor, -mut_len, -mut_type,
-Neighbor_start_1, -Neighbor_start_2,
-Neighbor_end_1, -Neighbor_end_2)
}
df_mutation$Pos <- as.integer(df_mutation$Pos)
df_mutation <- df_mutation %>%
mutate(Ref = toupper(Ref)) %>%
mutate(Alt = toupper(Alt)) %>%
mutate(Neighborhood_sequence = toupper(Neighborhood_sequence)) %>%
arrange(Chr)
return(df_mutation)
} else {
print("No mutation in the mutation file, exit.")
if(NA)print("ERROR")
}
}
# The following block is used by usethis to automatically manage
# roxygen namespace tags. Modify with care!
## usethis namespace: start
## usethis namespace: end
NULL
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.