#' Loxcode experiment object
#'
#' The `loxcode_experiment` object enables handling of data from multiple samples.
#'
#' @slot name string, name of the experiment
#' @slot dir string, directory containing R1, R2 *.fastq files. Must end with '/'
#' @slot samples list, contains loxcode_sample objects that can be accessed by the sample name
#' @slot samp_table data.frame, user-specified table that can be loaded from an Excel spreadsheet
#' @slot count_matrixes , has barcode counts for all samples
#' @slot code_sets , has code sets generated by filtering, by default contains all codes in all samples
loxcode_experiment <- setClass(
"loxcode_experiment",
representation(
name = "character",
dir = "character",
samples = "list",
samp_table = "data.frame",
count_matrixes = "list",
code_sets = "list",
meta = "data.frame",
alias = "list"
),
prototype = list(
name = '',
dir = '',
samples = list(),
samp_table = data.frame(),
count_matrixes = list(),
code_sets = list(),
meta = data.frame(),
alias = list()
)
)
#' CHECK IF IN SHINY
#'
#' @return True if shiny::runApp is round in callstack
#' @export
shiny_running = function () {
# Look for `runApp` call somewhere in the call stack.
frames = sys.frames()
calls = lapply(sys.calls(), `[[`, 1)
call_name = function (call)
if (is.function(call)) '<closure>' else deparse(call)
call_names = vapply(calls, call_name, character(1))
target_call = grep('^runApp$', call_names)
if (length(target_call) == 0)
return(FALSE)
# Found a function called `runApp`, verify that it’s Shiny’s.
target_frame = frames[[target_call]]
namespace_frame = parent.env(target_frame)
isNamespace(namespace_frame) && environmentName(namespace_frame) == 'shiny'
}
#' Load data from FASTQ for all samples in a loxcode_experiment object
#'
#' For each sample in the loxcode_experiment, barcodes readout is performed from FASTQ
#' followed by element imputation for the 13-element codes. Loxcodes are then validated, IDs fetched,
#' and distance-from-origin is retrieved.
#'
#' @param x loxcode_experiment object for which to load samples
#' @param full boolean, whether to produce full debugging output (default is FALSE, this uses significantly more memory)
#' @return loxcode_experiment object with sample data loaded
#' @export
setGeneric("load_samples", function(x, ...) {standardGeneric("load_samples")})
setMethod("load_samples", "loxcode_experiment", function(x, full = FALSE, sat = FALSE){
x@samples <- lapply(names(x@samples), function(z){
print(z)
if (shiny_running()) {
shiny::showNotification(paste("Loading", z, "..."))
}
samp_table_sliced <- x@samp_table[match(z, x@samp_table$sample), ]
files = list.files(x@dir)
files = sort(files[grepl(".fastq$", files)])
indices = grepl(x@samples[[z]], files)
matches = sort(files[indices])
out <- loxcoder::decode(c(paste0(x@dir, matches[1]), paste0(x@dir, matches[2])),
name = z, meta = samp_table_sliced[,!names(samp_table_sliced)%in%c("sample", "prefix", "min_r1_len", "min_r2_len")],
min_r1_len = samp_table_sliced$min_r1_len,
min_r2_len = samp_table_sliced$min_r2_len, full = full, sat = sat)
out <- loxcoder::impute(out)
out <- loxcoder::validate(out)
out <- loxcoder::makeid(out)
out <- loxcoder::get_origin_dist(out)
return(out)
})
names(x@samples) <- x@samp_table$sample
return(x)
})
#' Fetch names of samples
#'
#' The names returned by this function can be used to refer to each individual sample
#' @param x loxcode_experiment object
#' @return names (handles) of samples
#' @export
setGeneric("sampnames", function(x){standardGeneric("sampnames")})
setMethod("sampnames", "loxcode_experiment", function(x){
return(names(x@samples))
})
#' Set names of samples
#'
#' @export
setGeneric("sampnames<-", function(x, v){ standardGeneric("sampnames<-")})
setMethod("sampnames<-", "loxcode_experiment", function(x, v){
if(length(v) != length(unique(v))){
stop("Sample names are not unique")
}
names(x@samples) <- v
})
#' Fetch experiment name
#'
#' @param x loxcode_experiment object
#' @return name of experiment
#' @export
setGeneric("name", function(x){ standardGeneric("name")})
setMethod("name", "loxcode_experiment", function(x){
return(x@name)
})
#' Set experiment name
#'
#' @export
setGeneric("name<-", function(x, v) {standardGeneric("name<-")})
setMethod("name<-", "loxcode_experiment", function(x, v){
x@name <- v
})
#' Get loxcode_sample object by sample name
#'
#' @param x loxcode_experiment object
#' @param s sample name
#' @return loxcode_sample object corresponding to s
#' @export
setGeneric("sample", function(x, s){standardGeneric("sample")})
setMethod("sample", "loxcode_experiment", function(x, s){
return(x@samples[[s]])
})
#' Get unvalidated readout data for a given sample
#'
#' Equivalent to performing loxcoder::data() on the sample s
#' @param x loxcode_experiment object
#' @param s sample name
#' @return data.frame containing unvalidated readout data for sample s
#' @export
setGeneric("get_data", function(x, s){standardGeneric("get_data")})
setMethod("get_data", "loxcode_experiment", function(x, s){
return(loxcoder::data(x@samples[[s]]))
})
#' Get validated readout data for a given sample
#'
#' Equivalent to performing loxcoder::valid() on the sample s
#' @param x loxcode_experiment object
#' @param s sample name
#' @export
setGeneric("get_valid", function(x, s){standardGeneric("get_valid")})
setMethod("get_valid", "loxcode_experiment",function(x, s){
return(loxcoder::valid(x@samples[[s]]))
})
#' Get sample table
#'
#' @export
setGeneric("samptable", function(x){ standardGeneric("samptable")})
setMethod("samptable", "loxcode_experiment", function(x){
return(x@samp_table)
})
#' Set sample table
#'
#' @export
setGeneric("samptable<-", function(x, value){ standardGeneric("samptable<-")})
setMethod("samptable<-", "loxcode_experiment", function(x, value){
x@samp_table <- value
return(x)
})
#' Create experiment
#'
#' Create a loxcode_experiment from xlsx sample table.
#'
#' Must contain the following columns:
#'
#' * `sample` -- sample name. This is used as the unique identifier for the sample
#'
#' * `prefix` -- sample prefix. suffix_R1 and suffix_R2 are appended to this
#'
#' * `meta` -- sample metadata. This is user specified
#'
#' * `min_r1_len` -- minimum R1 length
#'
#' * `min_r2_len` -- minimum R2 length
#'
#' @param name string, name of the experiment
#' @param s string, path to user-specified Excel file containing experiment samples.
#' @param dir string, path to directory containing fastq files
#' @param load boolean, whether to load samples or not (default is TRUE)
#' @param full boolean, whether to produce full debugging output (default is FALSE, this uses significantly more memory)
#' @param sat boolean, whether to keep saturation info
#' @return loxcode_experiment object
#' @export
load_from_xlsx <- function(name, s, dir, load = TRUE, full = FALSE, sat = FALSE){
x <- new("loxcode_experiment", name = name, dir = dir)
x@samp_table <- data.frame(read_excel(s))
samp_table = x@samp_table
x@samples = lapply(x@samp_table$prefix, function(z) z)
names(x@samples) <- x@samp_table$sample
if(load){
x <- loxcoder::load_samples(x, full = full, sat = sat);
x <- loxcoder::setup_count_matrix_codesets(x);
x <- loxcoder::setup_metadata(x);
x <- loxcoder::fill_alias(x);
}
return(x)
}
# Same as load_from_xlsx but now the parameters
# s, dir, suffix_R1, suffix_R2
# are lists
load_from_xlsx_multi <- function(name, s, dir, suffix_R1, suffix_R2, load = TRUE, full = FALSE, sat = FALSE){
x <- new("loxcode_experiment", name = name, dir = dir)
return(x)
}
#' Merge two loxcode_sample objects
#'
#' Barcodes from s1 and s2 are merged. For barcodes present in both samples, read counts are
#' summed. Sample descriptors and other members are concatenated in order.
#' @param s1 sample 1
#' @param s2 sample 2
#' @return loxcode_sample object corresponding to merged sample
#' @export
merge_sample <- function(s1, s2){
m <- merge(s1@decode@data, s2@decode@data, by = c('code', 'size', 'is_valid', 'id', 'dist_orig'), all = T)
m$count.x[is.na(m$count.x)] <- 0
m$count.y[is.na(m$count.y)] <- 0
m$count <- m$count.x + m$count.y
m <- m[, !(names(m) %in% c('count.x', 'count.y'))]
m <- m[order(m$code), ]
m <- m[, c(ncol(m), 1:(ncol(m)-1))]
s <- new('loxcode_sample')
s@decode <- new('decode_output')
s@decode@data <- m
# for the rest of the members, we just duplicate
s@decode@saturation <- list(s1@decode@saturation, s2@decode@saturation)
s@decode@read_ids <- list(s1@decode@read_ids, s2@decode@read_ids)
s@meta <- cbind(s1@meta, s2@meta)
s@name <- paste(s1@name, s2@name, sep = '_')
s@files <- cbind(s1@files, s2@files)
s@consensus_filtered_data <- list(s1@consensus_filtered_data,
s2@consensus_filtered_data)
s@decode_stats <- list(s1@decode_stats,
s2@decode_stats)
return(s)
}
#' Merge samples by label
#'
#' Merge samples present in a loxcode_experiment object according to a specified label column in
#' samp_table. All samples with a common value for the label column are merged together. The resulting
#' loxcode_experiment object contains loxcode_samples named by the corresponding labels in by.
#'
#' @param x loxcode_experiment object
#' @param by name of column in samp_table to merge by
#' @return loxcode_experiment object containing merged samples
#' @export
setGeneric('merge_by', function(x, by){ standardGeneric('merge_by') })
setMethod('merge_by', 'loxcode_experiment', function(x, by){
vals <- unique(x@samp_table[, by])
x_merged <- new("loxcode_experiment", name = paste(x@name, 'merge_by', by, sep = '_'),
dir = x@dir)
x_merged@samp_table <- x@samp_table
x_merged@samples <- lapply(vals, function(z){
print(z)
index <- which(x@samp_table[, by] == z)
if(length(index) > 0){
out <- x@samples[[index[1]]]
for(i in 2:length(index)){
out <- merge_sample(out, x@samples[[index[i]]])
}
return(out)
}else{
return(NA)
}
})
names(x_merged@samples) <- vals
return(x_merged)
})
#' Generate count_matrix from individual samples and add standard code_sets
#' @export
setGeneric("setup_count_matrix_codesets", function(x, ...) {standardGeneric("setup_count_matrix_codesets")})
setMethod("setup_count_matrix_codesets", "loxcode_experiment", function(x){
m=data.frame(code="")
for(i in x@samp_table$sample){
m=merge(m,loxcoder::sample(x, i)@decode@data, by="code", all=TRUE)
}
row.names(m)=m$code
m=m[-1,-1]
mm=m[,grepl("count",names(m))];
mm[is.na(mm)]=0;
names(mm)=x@samp_table$sample;
ms=m[,grepl("size",names(m))];
mv=m[,grepl("valid",names(m))];
mo=m[,grepl("orig",names(m))];
#define some standard code subsets
all.codes=data.frame(code=row.names(mm), size=rowMeans(ms,na.rm=TRUE), is_valid=rowMeans(mv,na.rm=TRUE), dist_orig=rowMeans(mo,na.rm=TRUE),stringsAsFactors = FALSE)
valid.codes=subset(all.codes,is_valid==1)
valid.codes$flp_dist<- loxcoder::min_flip_dist(valid.codes$code, valid.codes$size, valid.codes$is_valid)$d_min
invalid.codes=subset(all.codes,is_valid==0);
x@count_matrixes[["all_samples"]]=mm
x@code_sets[["all_codes"]]=plyr::rbind.fill(valid.codes,invalid.codes);
x@code_sets[["valid_codes"]]=valid.codes;
x@code_sets[["invalid_codes"]]=invalid.codes;
return(x)
})
#' Generate metadata for the loxcode experiment
#' @export
setGeneric("setup_metadata", function(x, ...) {standardGeneric("setup_metadata")})
setMethod("setup_metadata", "loxcode_experiment", function(x){
for (i in 1:length(x@samples)){
row = data.frame(sample_name = name(x@samples[[i]]), stringsAsFactors = FALSE)
row <- merge(row, x@samples[[i]]@meta)
x@meta = plyr::rbind.fill(x@meta, row)
}
return(x)
})
#' Generate alias for the loxcode experiment samples
#' @export
setGeneric("fill_alias", function(x, ...) {sstandardGeneric("fill_alias")})
setMethod("fill_alias", "loxcode_experiment", function(x){
alias = data.frame()
for (i in 1:length(x@samples)){
row = data.frame(sample_name=name(x@samples[[i]]), alias=paste("Sample", i), stringsAsFactors = FALSE)
alias = plyr::rbind.fill(alias, row)
}
x@alias[["all_samples"]] = alias
return(x)
})
#' selects subset of samples
#' @param count_matrix
#' @export
#setGeneric("select_samples", function(count_matrix="all_samples",select) {standardGeneric("select_samples")})
#setMethod("select_samples", function(count_matrix="all_samples",select){
#stringr::str_split_fixed(names(D@samples),"_",4)[,3]
#})
#' Creates a new codeset with dataframe of codesets
#'
#' @param x loxcode_experiment object
#' @param c name of existing codeset to choose from
#' @param I indices to include
#' @param n name of the new codeset
#' @return new loxcode_experiment object with a new codeset
#' @export
setGeneric("make_codeset_index", function(x, c, I, n) {standardGeneric("make_codeset_index")})
setMethod("make_codeset_index", "loxcode_experiment", function(x, c, I, n){
new_experiment <- x
newset=data.frame()
for (i in I){
row = x@code_sets[[c]][i,]
newset = plyr::rbind.fill(newset, row)
}
new_experiment@code_sets[[n]] = newset
return(new_experiment)
})
#' Deletes a specified codeset from a loxcode experiment
#'
#' @param x loxcode_experiment object
#' @param n name of the codeset to be deleted
#' @return new loxcode_experiment object with a new codeset
#' @export
setGeneric("delete_codeset", function(x, n) {standardGeneric("delete_codeset")})
setMethod("delete_codeset", "loxcode_experiment", function(x, n){
# always retain all_codes, valid_codes and invalid_codes
if(n=="all_codes" | n=="valid_codes" | n=="invalid_codes"){
return(x)
} else{
new_experiment <- x
new_experiment@code_sets[[n]] <- NULL
return(new_experiment)
}
})
#' Creates a new count_matrix with dataframe of samples
#'
#' @param x loxcode_experiment object
#' @param c name of existing count_matrix to choose from
#' @param I indices to include
#' @param n name of the new count_matrix
#' @return new loxcode_experiment object with a new count_matrix
#' @export
setGeneric("make_count_matrix", function(x, c, I, n) {standardGeneric("make_count_matrix")})
setMethod("make_count_matrix", "loxcode_experiment", function(x, c, I, n) {
new_experiment <- x
new_matrix = x@count_matrixes[[c]]
new_matrix = new_matrix[I]
new_experiment@count_matrixes[[n]] = new_matrix
alias = data.frame()
for (i in 1:ncol(new_matrix)){
row = data.frame(sample_name=names(new_matrix)[[i]], alias=paste("Sample", i), stringsAsFactors = FALSE)
alias = plyr::rbind.fill(alias, row)
}
new_experiment@alias[[n]] = alias
return(new_experiment)
})
#' Creates a new count_matrix with dataframe of samples
#'
#' @param x loxcode_experiment object
#' @param c name of existing count_matrix to be deleted
#' @return updated loxcode_experiment object
#' @export
setGeneric("delete_count_matrix", function(x, c) {standardGeneric("delete_count_matrix")})
setMethod("delete_count_matrix", "loxcode_experiment", function(x, c) {
if (c=="all_samples"){
return(x)
} else {
new_experiment <- x
new_experiment@count_matrixes[[c]] <- NULL
return (new_experiment)
}
})
#' Renames a sample in a loxcode_experiment
#'
#' @param x loxcode_experiment object
#' @param c count_matrix name
#' @param n new sample name
#' @param o old sample name
#' @return new loxcode_experiment object with updated sample name
#' @export
setGeneric("rename_sample", function(x, c, n, o) {standardGeneric("rename_sample")})
setMethod("rename_sample", "loxcode_experiment", function(x, c, n, o) {
# count matrixes
temp <- x@count_matrixes[[c]]
names(temp)[names(temp) == o] <- n
x@count_matrixes[[c]] <- temp
# name
x@samples[[o]]@name = n
# samples
temp <- x@samples[[o]]
x@samples[[o]] <- NULL
x@samples[[n]] <- temp
# meta
temp <- x@meta
temp[temp==o] <- n
x@meta <- temp
# samp table
temp <- x@samp_table
temp[temp==o] <- n
x@samp_table <- temp
return(x)
})
#' Changes the alias of a sample
#'
#' @param x loxcode_experiment object
#' @param c current count_matrix
#' @param s sample name
#' @param n new alias
#' @return loxcode_experiment object with changed sample alias
#' @export
setGeneric("new_alias", function(x, c, s, n) {standardGeneric("new_alias")})
setMethod("new_alias", "loxcode_experiment", function(x, c, s, n) {
alias = x@alias[[c]]
index = match(s, alias$sample_name)
alias[index, 2] = n
x@alias[[c]] = alias
return(x)
})
#' Collapses the count matrix into samples with the sample metadata
#'
#' @param x loxcode_experiment object
#' @param count_matrix current count_matrix
#' @param collapse column names of metadata on which to collapse
#' @param name name of new count_matrix
#' @param union boolean, True if barcodes in either should be counted
#' @param average boolean, True if counts should be averaged instead of summed
#' @return loxcode_experiment object with new count_matrix
#' @export
setGeneric("collapse", function(x, count_matrix, collapse, name, union=TRUE, average=FALSE) {standardGeneric("collapse")})
setMethod("collapse", "loxcode_experiment", function(x, count_matrix, collapse, name, union=TRUE, average=FALSE) {
M = x@count_matrixes[[count_matrix]]
S = get_collapsed_meta(x, count_matrix)
S = cbind(sample_name = 0, S)
S$sample_name = row.names(S)
row.names(S) = seq(1, nrow(S))
U = as.data.frame(unique(S[,collapse]), stringsAsFactors = FALSE); # get unique combinations
U = na.omit(U)
MM = data.frame(row.names=row.names(M), stringsAsFactors = FALSE) #new data.frame to store results
n=array()
index = c()
for (c in collapse){
index = append(index, match(c, names(S)))
}
#loop over all unique combinations
for(i in 1:nrow(U)) {
filter=array();
for(j in 1:nrow(S)) {
filter[j]=all(S[j,index]==U[i,])
}
filter[is.na(filter)]=FALSE
if(sum(filter==TRUE)==0) next;
cols = sum(filter==TRUE)
# bardcode in any sample
if(cols>1 & union){
rsums = rowSums(M[,filter])
if (average){
MM=cbind(MM,(rsums/cols))
}
else {
MM=cbind(MM, rsums)
}
}
# barcode in all samples
else if(sum(filter==TRUE)>1 & !union) {
rsums = rowSums(M[,filter])
if (average) {
MM=cbind(MM,(apply(M[,filter]>0,1,all)*rsums/cols))
}
else {
MM=cbind(MM,(apply(M[,filter]>0,1,all)*rsums))
}
}
else {MM=cbind(MM,M[,filter]);} #or if only one sample
n[i]=paste(U[i,],collapse="__")
}
names(MM)=n;
is.num <- sapply(MM, is.numeric)
MM[is.num] <- lapply(MM[is.num], round, 2)
x@count_matrixes[[name]] <- MM
alias = data.frame()
for (i in 1:ncol(MM)){
row = data.frame(sample_name=names(MM)[[i]], alias=paste("Sample", i), stringsAsFactors = FALSE)
alias = plyr::rbind.fill(alias, row)
}
x@alias[[name]] = alias
return(x)
})
#' Collapses the count matrix into samples with the sample metadata (v2)
#'
#' @param lox loxcode_experiment object
#' @param count_matrix current count_matrix
#' @param collapse column names of metadata on which to collapse
#' @param name name of new count_matrix
#' @param union boolean, True if barcodes in either should be counted
#' @param average boolean, True if counts should be averaged instead of summed
#' @return loxcode_experiment object with new collapsed samples
#' @export
setGeneric("collapse2", function(lox, count_matrix, collapse, name, union=TRUE, average=FALSE) {standardGeneric("collapse2")})
setMethod("collapse2", "loxcode_experiment", function(lox, count_matrix, collapse, name, union=TRUE, average=FALSE) {
counts = lox@count_matrixes[[count_matrix]]
meta = subset(lox@meta, sample_name %in% names(counts))
params = match(collapse, names(meta))
# group samples based on which share metadata
track = list()
for (i in 1:nrow(meta)) {
sample = meta$sample_name[i]
metadata = paste0(meta[i, params], collapse = "_")
if (metadata %in% names(track)) track[[metadata]] = c(track[[metadata]], sample)
else track[[metadata]] = c(sample)
}
# merge samples that share the same meta data
new_samples = lapply(track, function(samples) merge_sample_list(lox, samples, union, average))
for (i in 1:length(new_samples)) {
sample_name = names(track)[i]
new_samples[[i]]@name = sample_name
metadata = cbind(data.frame(sample_name = sample_name), new_samples[[i]]@meta)
lox@meta = rbind.fill(lox@meta, metadata)
}
lox@samples = c(lox@samples, new_samples)
# modify count_matrixes
codes = lox@code_sets[["all_codes"]]$code
count_matrix = lox@count_matrixes[["all_samples"]]
new_count_matrix = data.frame(matrix(nrow = length(codes), ncol = length(new_samples)))
row.names(new_count_matrix) = codes
names(new_count_matrix) = names(track)
for (i in 1:length(track)) {
if (length(track[[i]]) == 1) new_count_matrix[[i]] = count_matrix[[track[[i]][1]]]
else new_count_matrix[[i]] = rowSums(count_matrix[ , track[[i]]])
}
lox@count_matrixes[[name]] = new_count_matrix
# fill aliases
alias = data.frame()
for (i in 1:ncol(new_count_matrix)){
row = data.frame(sample_name=names(new_count_matrix)[[i]], alias=paste("Sample", i), stringsAsFactors = FALSE)
alias = plyr::rbind.fill(alias, row)
}
lox@alias[[name]] = alias
return(lox)
})
#' Merge list of samples
#' Merges a list of samples into one loxcode_sample object
#'
#' @param lox loxcode_experiment object
#' @param samples list of sample names to be merged
#' @param union True if barcodes in either samples should be counted
#' @param average True if counts should be averaged instead of summed
#' @return merged loxcode_sample object
#' @export
setGeneric("merge_sample_list", function(lox, samples, union=TRUE, average=FALSE) {standardGeneric("merge_sample_list")})
setMethod("merge_sample_list", "loxcode_experiment", function(lox, samples, union=TRUE, average=FALSE) {
# initialize and declare variables
new = new("loxcode_sample")
FIXED_PARAMS = c("code", "size", "is_valid", "id", "dist_orig")
samples_list = lox@samples[samples]
# fill decode@data slot
if (length(samples_list) == 1) {
merged_data = samples_list[[1]]@decode@data
counts = merged_data$count
firstreads = merged_data$firstread
}
else {
data_list = c()
data_list = lapply(samples_list, function(x) c(data_list, x@decode@data))
merged_data = Reduce(function(x, y) merge(x, y, all = union, by = FIXED_PARAMS), data_list)
counts = rowSums(merged_data[ ,grepl("count", names(merged_data))], na.rm = TRUE)
if (average) {counts = counts / length(samples)}
firstreads = rowSums(merged_data[,grepl("firstread", names(merged_data))], na.rm = TRUE) / length(samples)
}
new@decode@data = data.frame(count = counts,
firstread = firstreads,
code = merged_data$code,
size = merged_data$size,
is_valid = merged_data$is_valid,
id = merged_data$id,
dist_orig = merged_data$dist_orig)
new@decode@saturation = integer(0)
new@decode@read_ids = list()
# fill in the metadata slot
meta_list = c()
meta_list = lapply(samples_list, function(x) c(meta_list, x@meta))
meta = Reduce(function(x, y) merge(x, y, all = TRUE), meta_list)
if (length(samples_list) == 1) new@meta = data.frame(meta)
else new@meta = meta[lapply(meta, function(x) length(unique(x))) == 1][1, ]
# files slot
new@files = list()
new@files = lapply(samples_list, function(x) list.append(new@files, x@files))
# decode stats slot
for (stat in names(samples_list[[1]]@decode_stats)) {
values = lapply(samples_list, function(x) x@decode_stats[[stat]])
new@decode_stats[[stat]] = sum(unlist(values))
}
return (new)
})
#' Get metadata of a merged experiment (old version)
#'
#' @param x loxcode_experiment object
#' @param s set of merged samples
#' @return a data frame of meta data
#' @export
setGeneric("get_collapsed_meta2", function(x, s) {standardGeneric("get_collapsed_meta2")})
setMethod("get_collapsed_meta2", "loxcode_experiment", function(x, s) {
counts = x@count_matrixes[[s]]
sample_names = names(counts)
# Divide samples into collapsed vs non-collapsed
non_col_names = intersect(names(x@samples), names(counts))
col_names = setdiff(sample_names, non_col_names)
# metadata of collapsed samples
df1 = data.frame(stringsAsFactors = FALSE)
if (length(col_names) > 0) {
col_meta = strsplit(col_names, split = "__")
for (i in 1:length(col_meta)) {
row = data.frame("sample_name" = col_names[[i]], t(col_meta[[i]]), stringsAsFactors = FALSE)
df1 = plyr::rbind.fill(df1, row)
}
used = c()
for (i in 2:ncol(df1)) {
for (j in 1:ncol(x@meta)) {
if (!names(x@meta)[[j]] %in% used & all(df1[[i]] %in% x@meta[[j]])) {
names(df1)[[i]] <- names(x@meta)[[j]]
used = c(used, names(x@meta)[[j]])
break
}
}
}
}
# metadata of non_collapsed samples
df2 = subset(x@meta, sample_name %in% non_col_names)
library(plyr)
df = rbind.fill(df2, df1)
row.names(df) = df$sample_name
df$sample_name = NULL
return(df)
})
#' Get metadata of a merged experiment
#'
#' @param x loxcode_experiment object
#' @param s set of merged samples
#' @return a data frame of meta data
#' @export
setGeneric("get_collapsed_meta", function(x, s) {standardGeneric("get_collapsed_meta")})
setMethod("get_collapsed_meta", "loxcode_experiment", function(x, s) {
counts = x@count_matrixes[[s]]
sample_names = names(counts)
# Divide samples into collapsed vs non-collapsed
non_col_names = intersect(names(x@samples), names(counts))
col_names = setdiff(sample_names, non_col_names)
# metadata of collapsed samples
meta = sapply(x@meta, unique)
df1 = data.frame(matrix(ncol = length(meta), nrow = length(col_names)))
names(df1) = names(meta)
df1$sample_name = col_names
if (length(col_names)) {
for (i in 1:length(col_names)) {
components = unlist(strsplit(col_names[i], split = "__"))
if (length(components)==1 & grepl("NA", components[[1]])) {
# no metadata to insert
break
}
else {
used_cols = c()
for (metadata in components) {
possible_cols = names(meta)[sapply(meta, function(x) metadata %in% x)]
possible_cols = setdiff(possible_cols, intersect(possible_cols, used_cols))
df1[i, possible_cols[[1]]] = metadata
used_cols = c(used_cols, possible_cols[1])
}
}
}
}
# metadata of non_collapsed samples
df2 = subset(x@meta, sample_name %in% non_col_names)
library(plyr)
df = rbind.fill(df2, df1)
df = df[!is.na(df$sample_name), ]
row.names(df) = df$sample_name
df$sample_name = NULL
return(df)
})
#' Create new filtered code set according to defined parameters
#'
#' @param x loxcode_experiment object
#' @param s independent sample set
#' @param c code set
#' @param t tolerance threshold for repeated proportions
#' @param m maximum repeats tolerated
#' @param n new code set name
#' @return new loxcode_experiment object
#' @export
setGeneric("make_filtered_codeset", function(x, s, c, t, m, n) {standardGeneric("make_filtered_codeset")})
setMethod("make_filtered_codeset", "loxcode_experiment", function(x, s, c, t, m, n) {
codeset = x@code_sets[[c]]
YY = filtered_codes_table(x, s, c, t, m)
x@code_sets[[n]] = subset(codeset,paste(codeset$size,codeset$dist_orig)%in%paste(YY$size,YY$dist_orig))
return (x)
})
#' rename code set
#'
#' @param x loxcode_experiment object
#' @param c code set
#' @param n new code set name
#' @return new loxcode_experiment object
#' @export
setGeneric("rename_codeset", function(x, c, n) {standardGeneric("rename_codeset")})
setMethod("rename_codeset", "loxcode_experiment", function(x, c, n) {
if (c %in% c("all_codes", "invalid_codes", "valid_codes")) {
return(x)
}
temp = x@code_sets[[c]]
x@code_sets[[c]] <- NULL
x@code_sets[[n]] = temp
return(x)
})
#' rename sample set
#'
#' @param x loxcode_experiment object
#' @param s sample set
#' @param n new code set name
#' @return new loxcode_experiment object
#' @export
setGeneric("rename_sampleset", function(x, s, n) {standardGeneric("rename_sampleset")})
setMethod("rename_sampleset", "loxcode_experiment", function(x, s, n) {
if (s %in% c("all_samples")) {
return(x)
}
temp = x@count_matrixes[[s]]
x@count_matrixes[[s]] <- NULL
x@count_matrixes[[n]] = temp
temp = x@alias[[s]]
x@alias[[s]] <- NULL
x@alias[[n]] = temp
return(x)
})
#' generate sample aliases
#'
#' @param lox loxcode_experiment object
#' @param s sample set
#' @param meta columns of meta data on which to collapse
#' @return loxcode_experiment object with aliases generated
#' @export
setGeneric("generate_alias", function(lox, s="all_samples", meta) {standardGeneric("generate_alias")})
setMethod("generate_alias", "loxcode_experiment", function(lox, s="all_samples", meta) {
x = lox
aliases = x@alias[[s]]
metadata = get_collapsed_meta(lox, s)
metadata$sample_name = row.names(metadata)
row.names(metadata) = seq(1, nrow(metadata))
for (i in 1:nrow(aliases)) {
sample = aliases$sample_name[[i]]
index = match(sample, metadata$sample_name)
row = metadata[index,]
alias = paste(row[meta], collapse="_")
aliases$alias[[i]] = alias
}
counts = list()
length(counts) = length(unique(aliases$alias))
names(counts) = unique(aliases$alias)
for (i in 1:nrow(aliases)) {
alias = aliases$alias[[i]]
if (is.numeric(counts[[alias]])) {
counts[[alias]] = counts[[alias]] + 1
} else {
counts[[alias]] = 1
}
alias = paste(alias, as.character(counts[[alias]]), sep="_")
aliases$alias[[i]] = alias
}
x@alias[[s]] = aliases
return(x)
})
#' Merge two experiments
#'
#' @param lox1 loxcode_experiment 1
#' @param lox2 loxcode_experiment 2
#' @param name experiment name
#' @return new merged loxcode_experiment
#' @export
setGeneric("merge_experiments", function(lox1, lox2, name=NULL) {standardGeneric("merge_experiments")})
setMethod("merge_experiments", "loxcode_experiment", function(lox1, lox2, name=NULL) {
new = new("loxcode_experiment")
duplicates = c()
rename1 = c()
rename2 = c()
# name slot
if (is.null(name)) {
if (lox1@name == lox2@name) {
lox1@name = paste0(lox1@name, "_1")
lox2@name = paste0(lox2@name, "_2")
}
new@name = paste(lox1@name, lox2@name, collapse="+")
} else {
new@name = name
}
# directory slot
new@dir = c(lox1@dir, lox2@dir)
# samples slot
for (i in 1:length(lox1@samples)) {
lox1@samples[[i]]@meta$experiment = lox1@name
}
for (i in 1:length(lox2@samples)) {
lox2@samples[[i]]@meta$experiment = lox2@name
# find duplicates
if (lox2@samples[[i]]@name %in% names(lox1@samples)) {
duplicates = c(duplicates, lox2@samples[[i]]@name)
rename1 = c(rename1, paste0(lox2@samples[[i]]@name, "_1"))
rename2 = c(rename2, paste0(lox2@samples[[i]]@name, "_2"))
}
}
names(rename1) <- duplicates
names(rename2) <- duplicates
library(plyr)
names(lox1@samples) = revalue(names(lox1@samples), rename1, warn_missing = FALSE)
names(lox2@samples) = revalue(names(lox2@samples), rename2, warn_missing = FALSE)
new@samples = c(lox1@samples, lox2@samples)
for (i in 1:length(new@samples)) {
new@samples[[i]]@name = names(new@samples)[i]
}
# samp_table slot
library(plyr)
lox1@samp_table$experiment = lox1@name
lox2@samp_table$experiment = lox2@name
lox1@samp_table$sample = revalue(lox1@samp_table$sample, rename1, warn_missing = FALSE)
lox2@samp_table$sample = revalue(lox2@samp_table$sample, rename2, warn_missing = FALSE)
new@samp_table = plyr::rbind.fill(lox1@samp_table, lox2@samp_table)
# count_matrixes slot
for (i in 1:length(lox1@count_matrixes)) {
names(lox1@count_matrixes[[i]]) = revalue(names(lox1@count_matrixes[[i]]), rename1, warn_missing = FALSE)
}
for (i in 1:length(lox2@count_matrixes)) {
names(lox2@count_matrixes[[i]]) = revalue(names(lox2@count_matrixes[[i]]), rename2, warn_missing = FALSE)
}
all1 = lox1@count_matrixes[["all_samples"]]
all2 = lox2@count_matrixes[["all_samples"]]
all_samples = merge(all1, all2, by = 0, all=TRUE)
all_samples[is.na(all_samples)] <- 0
row.names(all_samples) = all_samples$Row.names
all_samples$Row.names = NULL
counts1 = lox1@count_matrixes
counts2 = lox2@count_matrixes
counts1[["all_samples"]] <- NULL
counts2[["all_samples"]] <- NULL
counts1[[paste0("all_samples_", lox1@name)]] = all1
counts2[[paste0("all_samples_", lox2@name)]] = all2
new@count_matrixes[["all_samples"]] = all_samples
for (c in names(counts1)) {
new@count_matrixes[[c]] = counts1[[c]]
}
for (c in names(counts2)) {
new@count_matrixes[[c]] = counts2[[c]]
}
# code_sets slot
codes1 = lox1@code_sets
codes2 = lox2@code_sets
new@code_sets[["all_codes"]] = unique(rbind.fill(codes1[["all_codes"]], codes2[["all_codes"]]))
new@code_sets[[paste0("all_codes_", lox1@name)]] = codes1[["all_codes"]]
new@code_sets[[paste0("all_codes_", lox2@name)]] = codes2[["all_codes"]]
new@code_sets[["valid_codes"]] = unique(rbind.fill(codes1[["valid_codes"]], codes2[["valid_codes"]]))
new@code_sets[[paste0("valid_codes_", lox1@name)]] = codes1[["valid_codes"]]
new@code_sets[[paste0("valid_codes_", lox2@name)]] = codes2[["valid_codes"]]
new@code_sets[["invalid_codes"]] = unique(rbind.fill(codes1[["invalid_codes"]], codes2[["invalid_codes"]]))
new@code_sets[[paste0("invalid_codes_", lox1@name)]] = codes1[["invalid_codes"]]
new@code_sets[[paste0("invalid_codes_", lox2@name)]] = codes2[["invalid_codes"]]
others1 = codes1[!names(codes1) %in% c("all_codes", "valid_codes", "invalid_codes")]
others2 = codes2[!names(codes2) %in% c("all_codes", "valid_codes", "invalid_codes")]
for (c in names(others1)) {
new@code_sets[[c]] = others1[[c]]
}
for (c in names(others2)) {
new@code_sets[[c]] = others2[[c]]
}
# meta slot
lox1@meta$sample_name = revalue(lox1@meta$sample_name, rename1, warn_missing = FALSE)
lox2@meta$sample_name = revalue(lox2@meta$sample_name, rename2, warn_missing = FALSE)
lox1@meta$experiment = lox1@name
lox2@meta$experiment = lox2@name
new@meta = plyr::rbind.fill(lox1@meta, lox2@meta)
# alias slot
for (i in 1:length(lox1@alias)) {
lox1@alias[[i]]$sample_name = revalue(lox1@alias[[i]]$sample_name, rename1, warn_missing = FALSE)
}
for (i in 1:length(lox2@alias)) {
lox2@alias[[i]]$sample_name = revalue(lox2@alias[[i]]$sample_name, rename2, warn_missing = FALSE)
}
all_aliases = data.frame(sample_name = names(all_samples)[!names(all_samples) == "Row.names"],
stringsAsFactors = FALSE)
all_aliases$alias = paste0("Sample ", seq(1, nrow(all_aliases)))
new@alias[["all_samples"]] = all_aliases
aliases1 = lox1@alias
aliases2 = lox2@alias
all1 = aliases1[["all_samples"]]
all2 = aliases2[["all_samples"]]
aliases1[["all_samples"]] <- NULL
aliases2[["all_samples"]] <- NULL
aliases1[[paste0("all_samples_", lox1@name)]] = all1
aliases2[[paste0("all_samples_", lox2@name)]] = all2
for (c in names(aliases1)) {
new@alias[[c]] = aliases1[[c]]
}
for (c in names(aliases2)) {
new@alias[[c]] = aliases2[[c]]
}
return(new)
})
#' Merge a vector of experiments
#'
#' @param experiments vector of loxcode_experiment objects
#' @param name name of new loxcode_experiment object
#' @return loxcode_experiment object
#'
#' @export
merge_experiments2 <- function(experiments, name) {
new = new("loxcode_experiment")
new@name = name
for (exp in experiments) {
new@dir = c(new@dir, exp@dir)
}
return (new)
}
#' Collapse selected samples
#'
#' @param lox loxcode_experiment object
#' @param s sample set/ count_matrix
#' @param c code set
#' @param i indices of samples to collapse
#' @param n new sample name
#' @param union boolean: use union or intersection of samples
#' @param average boolean: use average of sum of counts
#' @export
setGeneric("collapse_selection", function(lox, s="all_samples", c="all_codes", i, n=NULL, union=TRUE, average=FALSE) {standardGeneric("collapse_selection")})
setMethod("collapse_selection", "loxcode_experiment", function(lox, s="all_samples", c="all_codes", i, n=NULL, union=TRUE, average=FALSE) {
counts = lox@count_matrixes[[s]]
counts = subset(counts, row.names(counts) %in% lox@code_sets[[c]]$code)
meta = get_collapsed_meta(lox, s)[i, ]
sample_name = c()
for (k in 1:ncol(meta)) {
if (sum(is.na(meta[, k])) == 0 & dim(table(meta[, k])) == 1) {
sample_name = c(sample_name, meta[1, k])
}
}
sample_name = paste0(sample_name, collapse = "__")
if (sample_name == "") {
sample_name = paste0("NA", sum(grepl("NA", names(counts))))
}
cols = data.frame(row.names = seq(1:nrow(counts)))
cols[[sample_name]] = NA
for(k in 1:nrow(counts)) {
if (union==TRUE & average==TRUE) {
cols[k, 1] = sum(counts[k, i]) / length(i)
}
if (union==TRUE & average==FALSE) {
cols[k, 1] = sum(counts[k, i])
}
if (union==FALSE & average==TRUE) {
cols[k, 1] = sum(counts[k, i]) * as.numeric(sum(counts[k, i]==0)==0) / length(i)
}
if (union==FALSE & average==FALSE) {
cols[k, 1] = sum(counts[k, i]) * as.numeric(sum(counts[k, i]==0)==0)
}
}
counts = cbind(counts, cols)
counts[, i] = NULL
lox@count_matrixes[[s]] = counts
# update the aliases
aliases = data.frame("sample_name" = names(counts), "alias" = NA)
aliases$alias = paste0("Sample ", seq(1:nrow(aliases)))
lox@alias[[s]] = aliases
return (lox)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.