Nothing
.remove.ext <- function(.str) {
# gsub(pattern = '.*/|[.].*$', replacement = '', x = .str)
gsub(pattern = ".*/|[.](txt|tsv|csv)$|([.](txt|tsv|csv))?[.](gz|bzip|bzip2|bz2)$", replacement = "", x = .str)
}
.detect_format <- function(.filename) {
res_format <- NA
f <- file(.filename, "r")
l <- readLines(f, 1)
# use 2nd line of file for JSON formats
l2 <- ifelse(str_trim(l) == "{", readLines(f, 1), NA)
close(f)
if (identical(l, character(0))) {
res_format <- NA
} else if (any(str_detect(l, c("MiTCRFullExport", "mitcr")))) {
res_format <- "mitcr"
} else if (str_detect(l, "CDR3 amino acid sequence") && str_detect(l, "V segment") && !str_detect(l, "Good events")) {
res_format <- "mitcr"
} else if (str_detect(l, "CDR3 amino acid sequence") && str_detect(l, "V segment") && str_detect(l, "Good events")) {
res_format <- "migec"
} else if (str_detect(l, "v.end.in.cdr3") && str_detect(l, "cdr3aa")) {
res_format <- "migmap"
} else if (str_detect(l, "CDR3.amino.acid.sequence") && str_detect(l, "Umi.count")) {
res_format <- "tcr"
} else if (str_detect(tolower(l), "junction start") && str_detect(tolower(l), "v-d-j-region end") && str_detect(tolower(l), "v-region")) {
res_format <- "imgt"
} else if (str_detect(tolower(l), "v_resolved") && str_detect(tolower(l), "amino_acid")) {
res_format <- "immunoseq"
} else if (str_detect(tolower(l), "maxresolved")) {
res_format <- "immunoseq"
} else if (str_detect(tolower(l), "v_gene") && str_detect(tolower(l), "templates") && str_detect(tolower(l), "amino_acid")) {
res_format <- "immunoseq"
} else if (str_detect(tolower(l), "allvalignment") && str_detect(tolower(l), "vhit")) {
res_format <- "mixcr"
} else if (str_detect(tolower(l), "bestvhit") && str_detect(tolower(l), "bestjhit")) {
res_format <- "mixcr"
} else if (str_detect(tolower(l), "clonal sequence")) {
res_format <- "mixcr"
} else if (str_detect(tolower(l), "clonalsequence")) {
res_format <- "mixcr"
} else if (str_detect(tolower(l), "targetsequences")) {
res_format <- "mixcr"
} else if (str_detect(tolower(l), "junction_aa") && str_detect(tolower(l), "cigar")) {
res_format <- "airr"
} else if (str_detect(tolower(l), "raw_clonotype_id") && str_detect(tolower(l), "barcode") && str_detect(tolower(l), "v_gene")) {
res_format <- "10x (filt.contigs)"
} else if (str_detect(tolower(l), "clonotype_id") && str_detect(tolower(l), "v_gene")) {
res_format <- "10x (consensus)"
} else if (str_detect(tolower(l), "clonotype sequence") && str_detect(tolower(l), "v regions")) {
res_format <- "archer"
} else if (str_detect(tolower(l), "exported from immunarch")) {
res_format <- "immunarch"
} else if (str_detect(tolower(l), "clones") && str_detect(tolower(l), "v.name") && str_detect(tolower(l), "proportion")) {
res_format <- "immunarch"
} else if (str_detect(l, "AAseq") && str_detect(l, "Vregion") && str_detect(l, "Frequency")) {
res_format <- "catt"
} else if (str_detect(l, "Number of reads") && str_detect(l, "Amino acid sequence") && str_detect(l, "V gene")) {
res_format <- "rtcr"
} else if (str_detect(l, "seqId") && str_detect(l, "cdrNucSeq") && str_detect(l, "cdrAASeq")) {
res_format <- "imseq"
} else if (str_detect(tolower(l), "freq") && str_detect(tolower(l), "cdr3nt") && str_detect(tolower(l), "v")) {
res_format <- "vdjtools"
} else if (str_detect(tolower(l), "count") && str_detect(tolower(l), "sequence") && str_detect(tolower(l), "d segment")) {
res_format <- "vdjtools"
} else if (!is.na(l2)) {
if (str_trim(l2) == "\"clones\": [") {
res_format <- "vidjil"
}
}
res_format
}
.make_names <- function(.char) {
if (has_no_data(.char)) {
NA
} else {
tolower(.char)
}
}
.which_recomb_type <- function(.name) {
recomb_type <- NA
i <- 1
while (is.na(recomb_type) && i < 100 && !is.na(.name[i])) {
if (any(str_detect(.name[i], c("TCRA", "TRAV", "TCRG", "TRGV", "IGKV", "IGLV")))) {
recomb_type <- "VJ"
} else if (any(str_detect(.name[i], c("TCRB", "TRBV", "TCRD", "TRDV", "IGHV")))) {
recomb_type <- "VDJ"
}
i <- i + 1
}
if (is.na(recomb_type)) {
warning("Can't determine the type of V(D)J recombination. No insertions will be presented in the resulting data table.")
}
recomb_type
}
.get_coltypes <- function(.filename, .nuc.seq, .aa.seq, .count,
.vgenes, .jgenes, .dgenes,
.vend, .jstart, .dstart, .dend,
.vd.insertions, .dj.insertions, .total.insertions,
.skip, .sep, .add = NA) {
table.colnames <- colnames(readr::read_delim(.filename,
col_types = cols(),
delim = .sep,
quote = "",
escape_double = FALSE,
comment = "",
n_max = 1,
trim_ws = TRUE,
skip = .skip
))
swlist <- list(
col_character(), col_character(),
col_integer(),
col_character(), col_character(), col_character(),
col_integer(), col_integer(), col_integer(), col_integer(),
col_integer(), col_integer(), col_integer()
)
names(swlist) <- tolower(c(
.nuc.seq, ifelse(is.na(.aa.seq), "NA", .aa.seq),
.count,
.vgenes, .jgenes, .dgenes,
.vend, .jstart, .dstart, .dend,
.vd.insertions, .dj.insertions, .total.insertions
))
if (!has_no_data(.add)) {
swlist <- c(swlist, rep(list(col_guess()), length(.add)))
names(swlist)[tail(seq_along(swlist), length(.add))] <- .add
}
swlist <- c(swlist, "_")
if (is.na(.aa.seq)) {
swlist <- swlist[-2]
}
col.classes <- list(sapply(tolower(table.colnames), function(x) {
do.call(switch, c(x, swlist))
}, USE.NAMES = FALSE))[[1]]
names(col.classes) <- table.colnames
col.classes
}
.remove.alleles <- function(.data) {
if (has_class(.data, "list")) {
lapply(.data, .remove.alleles)
} else {
.data[[IMMCOL$v]] <- return_segments(.data[[IMMCOL$v]])
.data[[IMMCOL$j]] <- return_segments(.data[[IMMCOL$j]])
.data
}
}
.postprocess <- function(.data, .mode) {
.data[[IMMCOL$cdr3nt]][.data[[IMMCOL$cdr3nt]] == "NONE"] <- NA
logic <- is.na(.data[[IMMCOL$cdr3aa]]) & !is.na(.data[[IMMCOL$cdr3nt]])
if (any(logic)) {
.data[[IMMCOL$cdr3aa]][logic] <- bunch_translate(.data[[IMMCOL$cdr3nt]][logic])
}
logic <- is.na(.data[[IMMCOL$cdr3aa]]) & is.na(.data[[IMMCOL$cdr3nt]])
if (any(logic)) {
warn_msg <- c(" [!] Removed ", sum(logic))
warn_msg <- c(warn_msg, " clonotypes with no nucleotide and amino acid CDR3 sequence.")
message(warn_msg)
}
.data <- .data[!logic, ]
if (nrow(.data)) {
for (colname in c(IMMCOL$ve, IMMCOL$ds, IMMCOL$de, IMMCOL$js, IMMCOL$vnj, IMMCOL$vnd, IMMCOL$dnj)) {
if (colname %in% colnames(.data)) {
logic <- is.na(.data[[colname]])
.data[[colname]][logic] <- -1
logic <- .data[[colname]] < 0
.data[[colname]][logic] <- NA
}
}
for (immcol in c(IMMCOL, IMMCOL_EXT)) {
for (col_i in seq_along(immcol$order)) {
colname <- immcol$order[col_i]
if (colname %in% colnames(.data)) {
if (!has_class(.data[[colname]], immcol$type[col_i])) {
.data[[colname]] <- as(.data[[colname]], immcol$type[col_i])
}
}
}
}
logic <- is.na(.data[[IMMCOL$count]])
if (any(logic)) {
message(" [!] Warning: found NAs in clonal counts. Setting them to 1's.")
.data[[IMMCOL$count]][logic] <- 1
}
.data <- .data[order(.data[[IMMCOL$count]], decreasing = TRUE), ]
} else {
.data <- NULL
}
.data
}
.as_tsv <- function(.delim_file) {
df <- readr::read_tsv(.delim_file, comment = "#")
if (ncol(df) == 1) {
# treat file as non-tab delimited and convert it to temporary tsv
df <- readr::read_delim(.delim_file, comment = "#")
tsv_file <- tempfile()
readr::write_tsv(df, tsv_file)
return(tsv_file)
} else {
return(.delim_file)
}
}
.check_empty_repertoires <- function(.data) {
empty_reps <- .data %>%
sapply(function(repertoire) {
nrow(repertoire) == 0
}) %>%
sum()
if (empty_reps > 0) {
warning("Input data contains ", empty_reps, " empty repertoire(s)!")
}
}
.validate_repertoires_data <- function(.data) {
if (!inherits(.data, "list")) {
stop("Wrong input data format: expected list of immune repertoires!")
} else if (length(.data) == 0) {
stop("Input list of immune repertoires is empty!")
} else if (inherits(.data[[1]], "list")) {
stop(
"Wrong input data format: expected list of immune repertoires, found nested list!\n",
"Maybe, immdata is passed instead of immdata$data."
)
}
.check_empty_repertoires(.data)
}
.validate_immdata <- function(.immdata) {
if (!inherits(.immdata, "list")) {
stop("Input data is not a list; please pass Immunarch dataset object as input.")
} else if (length(.immdata) < 2 |
!("data" %in% names(.immdata)) |
!("meta" %in% names(.immdata))) {
stop(
"Input list must contain \"data\" and \"meta\" elements;\n",
"please pass Immunarch dataset object as input.\n",
"Maybe, immdata$data is passed instead of immdata."
)
} else if (!inherits(.immdata$data, "list") | !is.data.frame(.immdata$meta)) {
stop(
"Wrong input data format: expected list with \"data\" as list ",
"and \"meta\" as dataframe;\n",
"please pass Immunarch dataset object as input."
)
} else if (length(.immdata$data) == 0) {
stop("Input list of immune repertoires in \"data\" is empty!")
} else if (length(.immdata$data) != nrow(.immdata$meta)) {
stop(
"Number of samples is different in data (", length(.immdata$data),
") and metadata (", nrow(.immdata$meta), ")!"
)
}
.check_empty_repertoires(.immdata$data)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.