R/loxcode_experiment.R

Defines functions load_from_xlsx load_from_xlsx_multi merge_sample merge_experiments2

Documented in load_from_xlsx merge_sample

#' Loxcode experiment object
#'
#' The `loxcode_experiment` object enables handling of data from multiple samples.
#'
#' @slot name string, name of the experiment
#' @slot dir string, directory containing R1, R2 *.fastq files. Must end with '/'
#' @slot samples list, contains loxcode_sample objects that can be accessed by the sample name
#' @slot samp_table data.frame, user-specified table that can be loaded from an Excel spreadsheet
#' @slot count_matrixes , has barcode counts for all samples
#' @slot code_sets , has code sets generated by filtering, by default contains all codes in all samples
loxcode_experiment <- setClass(
  "loxcode_experiment",

  representation(
    name = "character",
    dir = "character",
    samples = "list",
    samp_table = "data.frame",
    count_matrixes = "list",
    code_sets = "list",
    meta = "data.frame",
    alias = "list"
  ),

  prototype = list(
    name = '',
    dir = '',
    samples = list(),
    samp_table = data.frame(),
    count_matrixes = list(),
    code_sets = list(),
    meta = data.frame(),
    alias = list()
  )
)

#' CHECK IF IN SHINY
#'
#' @return True if shiny::runApp is round in callstack
#' @export
shiny_running = function () {
  # Look for `runApp` call somewhere in the call stack.
  frames = sys.frames()
  calls = lapply(sys.calls(), `[[`, 1)
  call_name = function (call)
    if (is.function(call)) '<closure>' else deparse(call)
  call_names = vapply(calls, call_name, character(1))

  target_call = grep('^runApp$', call_names)

  if (length(target_call) == 0)
    return(FALSE)

  # Found a function called `runApp`, verify that it’s Shiny’s.
  target_frame = frames[[target_call]]
  namespace_frame = parent.env(target_frame)
  isNamespace(namespace_frame) && environmentName(namespace_frame) == 'shiny'
}

#' Load data from FASTQ for all samples in a loxcode_experiment object
#'
#' For each sample in the loxcode_experiment, barcodes readout is performed from FASTQ
#' followed by element imputation for the 13-element codes. Loxcodes are then validated, IDs fetched,
#' and distance-from-origin is retrieved.
#'
#' @param x loxcode_experiment object for which to load samples
#' @param full boolean, whether to produce full debugging output (default is FALSE, this uses significantly more memory)
#' @return loxcode_experiment object with sample data loaded
#' @export
setGeneric("load_samples", function(x, ...) {standardGeneric("load_samples")})

setMethod("load_samples", "loxcode_experiment", function(x, full = FALSE, sat = FALSE){

  x@samples <- lapply(names(x@samples), function(z){
    print(z)

    if (shiny_running()) {
      shiny::showNotification(paste("Loading", z, "..."))
    }

    samp_table_sliced <- x@samp_table[match(z, x@samp_table$sample), ]
    files = list.files(x@dir)
    files = sort(files[grepl(".fastq$", files)])
    indices = grepl(x@samples[[z]], files)
    matches = sort(files[indices])
    out <- loxcoder::decode(c(paste0(x@dir, matches[1]), paste0(x@dir, matches[2])),
                            name = z, meta = samp_table_sliced[,!names(samp_table_sliced)%in%c("sample", "prefix", "min_r1_len", "min_r2_len")],
                            min_r1_len = samp_table_sliced$min_r1_len,
                            min_r2_len = samp_table_sliced$min_r2_len, full = full, sat = sat)
    out <- loxcoder::impute(out)
    out <- loxcoder::validate(out)
    out <- loxcoder::makeid(out)
    out <- loxcoder::get_origin_dist(out)
    return(out)
  })
  names(x@samples) <- x@samp_table$sample
  return(x)
})

#' Fetch names of samples
#'
#' The names returned by this function can be used to refer to each individual sample
#' @param x loxcode_experiment object
#' @return names (handles) of samples
#' @export
setGeneric("sampnames", function(x){standardGeneric("sampnames")})

setMethod("sampnames", "loxcode_experiment", function(x){
  return(names(x@samples))
})

#' Set names of samples
#'
#' @export
setGeneric("sampnames<-", function(x, v){ standardGeneric("sampnames<-")})

setMethod("sampnames<-", "loxcode_experiment", function(x, v){
  if(length(v) != length(unique(v))){
    stop("Sample names are not unique")
  }
  names(x@samples) <- v
})

#' Fetch experiment name
#'
#' @param x loxcode_experiment object
#' @return name of experiment
#' @export
setGeneric("name", function(x){ standardGeneric("name")})

setMethod("name", "loxcode_experiment", function(x){
  return(x@name)
})

#' Set experiment name
#'
#' @export
setGeneric("name<-", function(x, v) {standardGeneric("name<-")})

setMethod("name<-", "loxcode_experiment", function(x, v){
  x@name <- v
})

#' Get loxcode_sample object by sample name
#'
#' @param x loxcode_experiment object
#' @param s sample name
#' @return loxcode_sample object corresponding to s
#' @export
setGeneric("sample", function(x, s){standardGeneric("sample")})

setMethod("sample", "loxcode_experiment", function(x, s){
  return(x@samples[[s]])
})

#' Get unvalidated readout data for a given sample
#'
#' Equivalent to performing loxcoder::data() on the sample s
#' @param x loxcode_experiment object
#' @param s sample name
#' @return data.frame containing unvalidated readout data for sample s
#' @export
setGeneric("get_data", function(x, s){standardGeneric("get_data")})

setMethod("get_data", "loxcode_experiment", function(x, s){
  return(loxcoder::data(x@samples[[s]]))
})

#' Get validated readout data for a given sample
#'
#' Equivalent to performing loxcoder::valid() on the sample s
#' @param x loxcode_experiment object
#' @param s sample name
#' @export
setGeneric("get_valid", function(x, s){standardGeneric("get_valid")})

setMethod("get_valid", "loxcode_experiment",function(x, s){
  return(loxcoder::valid(x@samples[[s]]))
})

#' Get sample table
#'
#' @export
setGeneric("samptable", function(x){ standardGeneric("samptable")})

setMethod("samptable", "loxcode_experiment", function(x){
  return(x@samp_table)
})

#' Set sample table
#'
#' @export
setGeneric("samptable<-", function(x, value){ standardGeneric("samptable<-")})

setMethod("samptable<-", "loxcode_experiment", function(x, value){
  x@samp_table <- value
  return(x)
})

#' Create experiment
#'
#' Create a loxcode_experiment from xlsx sample table.
#'
#' Must contain the following columns:
#'
#' * `sample` -- sample name. This is used as the unique identifier for the sample
#'
#' * `prefix` -- sample prefix. suffix_R1 and suffix_R2 are appended to this
#'
#' * `meta` -- sample metadata. This is user specified
#'
#' * `min_r1_len` -- minimum R1 length
#'
#' * `min_r2_len` -- minimum R2 length
#'
#' @param name string, name of the experiment
#' @param s string, path to user-specified Excel file containing experiment samples.
#' @param dir string, path to directory containing fastq files
#' @param load boolean, whether to load samples or not (default is TRUE)
#' @param full boolean, whether to produce full debugging output (default is FALSE, this uses significantly more memory)
#' @param sat boolean, whether to keep saturation info
#' @return loxcode_experiment object
#' @export
load_from_xlsx <- function(name, s, dir, load = TRUE, full = FALSE, sat = FALSE){
  x <- new("loxcode_experiment", name = name, dir = dir)
  x@samp_table <- data.frame(read_excel(s))

  samp_table = x@samp_table

  x@samples = lapply(x@samp_table$prefix, function(z) z)
  names(x@samples) <- x@samp_table$sample
  if(load){
    x <- loxcoder::load_samples(x, full = full, sat = sat);
    x <- loxcoder::setup_count_matrix_codesets(x);
    x <- loxcoder::setup_metadata(x);
    x <- loxcoder::fill_alias(x);
  }
  return(x)
}

# Same as load_from_xlsx but now the parameters
# s, dir, suffix_R1, suffix_R2
# are lists
load_from_xlsx_multi <- function(name, s, dir, suffix_R1, suffix_R2, load = TRUE, full = FALSE, sat = FALSE){
  x <- new("loxcode_experiment", name = name, dir = dir)
  return(x)
}

#' Merge two loxcode_sample objects
#'
#' Barcodes from s1 and s2 are merged. For barcodes present in both samples, read counts are
#' summed. Sample descriptors and other members are concatenated in order.
#' @param s1 sample 1
#' @param s2 sample 2
#' @return loxcode_sample object corresponding to merged sample
#' @export
merge_sample <- function(s1, s2){
  m <- merge(s1@decode@data, s2@decode@data, by = c('code', 'size', 'is_valid', 'id', 'dist_orig'), all = T)
  m$count.x[is.na(m$count.x)] <- 0
  m$count.y[is.na(m$count.y)] <- 0
  m$count <- m$count.x + m$count.y
  m <- m[, !(names(m) %in% c('count.x', 'count.y'))]
  m <- m[order(m$code), ]
  m <- m[, c(ncol(m), 1:(ncol(m)-1))]
  s <- new('loxcode_sample')
  s@decode <- new('decode_output')
  s@decode@data <- m
  # for the rest of the members, we just duplicate
  s@decode@saturation <- list(s1@decode@saturation, s2@decode@saturation)
  s@decode@read_ids <- list(s1@decode@read_ids, s2@decode@read_ids)
  s@meta <- cbind(s1@meta, s2@meta)
  s@name <- paste(s1@name, s2@name, sep = '_')
  s@files <- cbind(s1@files, s2@files)
  s@consensus_filtered_data <- list(s1@consensus_filtered_data,
                                    s2@consensus_filtered_data)
  s@decode_stats <- list(s1@decode_stats,
                         s2@decode_stats)
  return(s)
}

#' Merge samples by label
#'
#' Merge samples present in a loxcode_experiment object according to a specified label column in
#' samp_table. All samples with a common value for the label column are merged together. The resulting
#' loxcode_experiment object contains loxcode_samples named by the corresponding labels in by.
#'
#' @param x loxcode_experiment object
#' @param by name of column in samp_table to merge by
#' @return loxcode_experiment object containing merged samples
#' @export
setGeneric('merge_by', function(x, by){ standardGeneric('merge_by') })

setMethod('merge_by', 'loxcode_experiment', function(x, by){
  vals <- unique(x@samp_table[, by])
  x_merged <- new("loxcode_experiment", name = paste(x@name, 'merge_by', by, sep = '_'),
                  dir = x@dir)
  x_merged@samp_table <- x@samp_table
  x_merged@samples <- lapply(vals, function(z){
    print(z)
    index <- which(x@samp_table[, by] == z)
    if(length(index) > 0){
      out <- x@samples[[index[1]]]
      for(i in 2:length(index)){
        out <- merge_sample(out, x@samples[[index[i]]])
      }
      return(out)
    }else{
      return(NA)
    }
  })
  names(x_merged@samples) <- vals
  return(x_merged)
})

#' Generate count_matrix from individual samples and add standard code_sets
#' @export
setGeneric("setup_count_matrix_codesets", function(x, ...) {standardGeneric("setup_count_matrix_codesets")})

setMethod("setup_count_matrix_codesets", "loxcode_experiment", function(x){
  m=data.frame(code="")
  for(i in x@samp_table$sample){
    m=merge(m,loxcoder::sample(x, i)@decode@data, by="code", all=TRUE)
  }
  row.names(m)=m$code
  m=m[-1,-1]
  mm=m[,grepl("count",names(m))];
  mm[is.na(mm)]=0;
  names(mm)=x@samp_table$sample;

  ms=m[,grepl("size",names(m))];
  mv=m[,grepl("valid",names(m))];
  mo=m[,grepl("orig",names(m))];

  #define some standard code subsets
  all.codes=data.frame(code=row.names(mm), size=rowMeans(ms,na.rm=TRUE), is_valid=rowMeans(mv,na.rm=TRUE), dist_orig=rowMeans(mo,na.rm=TRUE),stringsAsFactors = FALSE)
  valid.codes=subset(all.codes,is_valid==1)
  valid.codes$flp_dist<- loxcoder::min_flip_dist(valid.codes$code, valid.codes$size, valid.codes$is_valid)$d_min
  invalid.codes=subset(all.codes,is_valid==0);

  x@count_matrixes[["all_samples"]]=mm
  x@code_sets[["all_codes"]]=plyr::rbind.fill(valid.codes,invalid.codes);
  x@code_sets[["valid_codes"]]=valid.codes;
  x@code_sets[["invalid_codes"]]=invalid.codes;

  return(x)
})

#' Generate metadata for the loxcode experiment
#' @export
setGeneric("setup_metadata", function(x, ...) {standardGeneric("setup_metadata")})

setMethod("setup_metadata", "loxcode_experiment", function(x){
  for (i in 1:length(x@samples)){
    row = data.frame(sample_name = name(x@samples[[i]]), stringsAsFactors = FALSE)
    row <- merge(row, x@samples[[i]]@meta)
    x@meta = plyr::rbind.fill(x@meta, row)
  }
  return(x)
})

#' Generate alias for the loxcode experiment samples
#' @export
setGeneric("fill_alias", function(x, ...) {sstandardGeneric("fill_alias")})

setMethod("fill_alias", "loxcode_experiment", function(x){
  alias = data.frame()
  for (i in 1:length(x@samples)){
    row = data.frame(sample_name=name(x@samples[[i]]), alias=paste("Sample", i), stringsAsFactors = FALSE)
    alias = plyr::rbind.fill(alias, row)
  }
  x@alias[["all_samples"]] = alias

  return(x)
})

#' selects subset of samples
#' @param count_matrix
#' @export
#setGeneric("select_samples", function(count_matrix="all_samples",select) {standardGeneric("select_samples")})

#setMethod("select_samples", function(count_matrix="all_samples",select){

  #stringr::str_split_fixed(names(D@samples),"_",4)[,3]


#})

#' Creates a new codeset with dataframe of codesets
#'
#' @param x loxcode_experiment object
#' @param c name of existing codeset to choose from
#' @param I indices to include
#' @param n name of the new codeset
#' @return new loxcode_experiment object with a new codeset
#' @export
setGeneric("make_codeset_index", function(x, c, I, n) {standardGeneric("make_codeset_index")})

setMethod("make_codeset_index", "loxcode_experiment", function(x, c, I, n){
  new_experiment <- x
  newset=data.frame()
  for (i in I){
    row = x@code_sets[[c]][i,]
    newset = plyr::rbind.fill(newset, row)
  }
  new_experiment@code_sets[[n]] = newset
  return(new_experiment)
})

#' Deletes a specified codeset from a loxcode experiment
#'
#' @param x loxcode_experiment object
#' @param n name of the codeset to be deleted
#' @return new loxcode_experiment object with a new codeset
#' @export
setGeneric("delete_codeset", function(x, n) {standardGeneric("delete_codeset")})

setMethod("delete_codeset", "loxcode_experiment", function(x, n){
  # always retain all_codes, valid_codes and invalid_codes
  if(n=="all_codes" | n=="valid_codes" | n=="invalid_codes"){
    return(x)
  } else{
    new_experiment <- x
    new_experiment@code_sets[[n]] <- NULL
    return(new_experiment)
  }

})

#' Creates a new count_matrix with dataframe of samples
#'
#' @param x loxcode_experiment object
#' @param c name of existing count_matrix to choose from
#' @param I indices to include
#' @param n name of the new count_matrix
#' @return new loxcode_experiment object with a new count_matrix
#' @export
setGeneric("make_count_matrix", function(x, c, I, n) {standardGeneric("make_count_matrix")})

setMethod("make_count_matrix", "loxcode_experiment", function(x, c, I, n) {
  new_experiment <- x
  new_matrix = x@count_matrixes[[c]]
  new_matrix = new_matrix[I]
  new_experiment@count_matrixes[[n]] = new_matrix

  alias = data.frame()
  for (i in 1:ncol(new_matrix)){
    row = data.frame(sample_name=names(new_matrix)[[i]], alias=paste("Sample", i), stringsAsFactors = FALSE)
    alias = plyr::rbind.fill(alias, row)
  }

  new_experiment@alias[[n]] = alias

  return(new_experiment)
})

#' Creates a new count_matrix with dataframe of samples
#'
#' @param x loxcode_experiment object
#' @param c name of existing count_matrix to be deleted
#' @return updated loxcode_experiment object
#' @export
setGeneric("delete_count_matrix", function(x, c) {standardGeneric("delete_count_matrix")})

setMethod("delete_count_matrix", "loxcode_experiment", function(x, c) {
  if (c=="all_samples"){
    return(x)
  } else {
    new_experiment <- x
    new_experiment@count_matrixes[[c]] <- NULL
    return (new_experiment)
  }
})

#' Renames a sample in a loxcode_experiment
#'
#' @param x loxcode_experiment object
#' @param c count_matrix name
#' @param n new sample name
#' @param o old sample name
#' @return new loxcode_experiment object with updated sample name
#' @export
setGeneric("rename_sample", function(x, c, n, o) {standardGeneric("rename_sample")})

setMethod("rename_sample", "loxcode_experiment", function(x, c, n, o) {
  # count matrixes
  temp <- x@count_matrixes[[c]]
  names(temp)[names(temp) == o] <- n
  x@count_matrixes[[c]] <- temp

  # name
  x@samples[[o]]@name = n

  # samples
  temp <- x@samples[[o]]
  x@samples[[o]] <- NULL
  x@samples[[n]] <- temp

  # meta
  temp <- x@meta
  temp[temp==o] <- n
  x@meta <- temp

  # samp table
  temp <- x@samp_table
  temp[temp==o] <- n
  x@samp_table <- temp

  return(x)
})

#' Changes the alias of a sample
#'
#' @param x loxcode_experiment object
#' @param c current count_matrix
#' @param s sample name
#' @param n new alias
#' @return loxcode_experiment object with changed sample alias
#' @export

setGeneric("new_alias", function(x, c, s, n) {standardGeneric("new_alias")})

setMethod("new_alias", "loxcode_experiment", function(x, c, s, n) {
  alias = x@alias[[c]]
  index = match(s, alias$sample_name)
  alias[index, 2] = n
  x@alias[[c]] = alias
  return(x)
})

#' Collapses the count matrix into samples with the sample metadata
#'
#' @param x loxcode_experiment object
#' @param count_matrix current count_matrix
#' @param collapse column names of metadata on which to collapse
#' @param name name of new count_matrix
#' @param union boolean, True if barcodes in either should be counted
#' @param average boolean, True if counts should be averaged instead of summed
#' @return loxcode_experiment object with new count_matrix
#' @export
setGeneric("collapse", function(x, count_matrix, collapse, name, union=TRUE, average=FALSE) {standardGeneric("collapse")})

setMethod("collapse", "loxcode_experiment", function(x, count_matrix, collapse, name, union=TRUE, average=FALSE) {
  M = x@count_matrixes[[count_matrix]]
  S = get_collapsed_meta(x, count_matrix)
  S = cbind(sample_name = 0, S)
  S$sample_name = row.names(S)
  row.names(S) = seq(1, nrow(S))
  U = as.data.frame(unique(S[,collapse]), stringsAsFactors = FALSE); # get unique combinations
  U = na.omit(U)

  MM = data.frame(row.names=row.names(M), stringsAsFactors = FALSE) #new data.frame to store results

  n=array()
  index = c()
  for (c in collapse){
    index = append(index, match(c, names(S)))
  }

  #loop over all unique combinations
  for(i in 1:nrow(U)) {
    filter=array();
    for(j in 1:nrow(S)) {
      filter[j]=all(S[j,index]==U[i,])
    }
    filter[is.na(filter)]=FALSE
    if(sum(filter==TRUE)==0) next;

    cols = sum(filter==TRUE)

    # bardcode in any sample
    if(cols>1 & union){
      rsums = rowSums(M[,filter])
      if (average){
        MM=cbind(MM,(rsums/cols))
      }
      else {
        MM=cbind(MM, rsums)
      }
    }
    # barcode in all samples
    else if(sum(filter==TRUE)>1 & !union) {
      rsums = rowSums(M[,filter])
      if (average) {
        MM=cbind(MM,(apply(M[,filter]>0,1,all)*rsums/cols))
      }
      else {
        MM=cbind(MM,(apply(M[,filter]>0,1,all)*rsums))
      }
    }
    else {MM=cbind(MM,M[,filter]);} #or if only one sample

    n[i]=paste(U[i,],collapse="__")
  }

  names(MM)=n;
  is.num <- sapply(MM, is.numeric)
  MM[is.num] <- lapply(MM[is.num], round, 2)

  x@count_matrixes[[name]] <- MM

  alias = data.frame()
  for (i in 1:ncol(MM)){
    row = data.frame(sample_name=names(MM)[[i]], alias=paste("Sample", i), stringsAsFactors = FALSE)
    alias = plyr::rbind.fill(alias, row)
  }
  x@alias[[name]] = alias

  return(x)
})

#' Collapses the count matrix into samples with the sample metadata (v2)
#'
#' @param lox loxcode_experiment object
#' @param count_matrix current count_matrix
#' @param collapse column names of metadata on which to collapse
#' @param name name of new count_matrix
#' @param union boolean, True if barcodes in either should be counted
#' @param average boolean, True if counts should be averaged instead of summed
#' @return loxcode_experiment object with new collapsed samples
#' @export
setGeneric("collapse2", function(lox, count_matrix, collapse, name, union=TRUE, average=FALSE) {standardGeneric("collapse2")})

setMethod("collapse2", "loxcode_experiment", function(lox, count_matrix, collapse, name, union=TRUE, average=FALSE) {

  counts = lox@count_matrixes[[count_matrix]]
  meta = subset(lox@meta, sample_name %in% names(counts))
  params = match(collapse, names(meta))

  # group samples based on which share metadata
  track = list()
  for (i in 1:nrow(meta)) {
    sample = meta$sample_name[i]
    metadata = paste0(meta[i, params], collapse = "_")
    if (metadata %in% names(track)) track[[metadata]] = c(track[[metadata]], sample)
    else track[[metadata]] = c(sample)
  }

  # merge samples that share the same meta data
  new_samples = lapply(track, function(samples) merge_sample_list(lox, samples, union, average))
  for (i in 1:length(new_samples)) {
    sample_name = names(track)[i]
    new_samples[[i]]@name = sample_name
    metadata = cbind(data.frame(sample_name = sample_name), new_samples[[i]]@meta)
    lox@meta = rbind.fill(lox@meta, metadata)
  }
  lox@samples = c(lox@samples, new_samples)

  # modify count_matrixes
  codes = lox@code_sets[["all_codes"]]$code
  count_matrix = lox@count_matrixes[["all_samples"]]
  new_count_matrix = data.frame(matrix(nrow = length(codes), ncol = length(new_samples)))
  row.names(new_count_matrix) = codes
  names(new_count_matrix) = names(track)
  for (i in 1:length(track)) {
    if (length(track[[i]]) == 1) new_count_matrix[[i]] = count_matrix[[track[[i]][1]]]
    else new_count_matrix[[i]] = rowSums(count_matrix[ , track[[i]]])
  }
  lox@count_matrixes[[name]] = new_count_matrix

  # fill aliases
  alias = data.frame()
  for (i in 1:ncol(new_count_matrix)){
    row = data.frame(sample_name=names(new_count_matrix)[[i]], alias=paste("Sample", i), stringsAsFactors = FALSE)
    alias = plyr::rbind.fill(alias, row)
  }
  lox@alias[[name]] = alias

  return(lox)
})

#' Merge list of samples
#' Merges a list of samples into one loxcode_sample object
#'
#' @param lox loxcode_experiment object
#' @param samples list of sample names to be merged
#' @param union True if barcodes in either samples should be counted
#' @param average True if counts should be averaged instead of summed
#' @return merged loxcode_sample object
#' @export
setGeneric("merge_sample_list", function(lox, samples, union=TRUE, average=FALSE) {standardGeneric("merge_sample_list")})

setMethod("merge_sample_list", "loxcode_experiment", function(lox, samples, union=TRUE, average=FALSE) {

  # initialize and declare variables
  new = new("loxcode_sample")
  FIXED_PARAMS = c("code", "size", "is_valid", "id", "dist_orig")
  samples_list = lox@samples[samples]

  # fill decode@data slot
  if (length(samples_list) == 1) {
    merged_data = samples_list[[1]]@decode@data
    counts = merged_data$count
    firstreads = merged_data$firstread
  }
  else {
    data_list = c()
    data_list = lapply(samples_list, function(x) c(data_list, x@decode@data))
    merged_data = Reduce(function(x, y) merge(x, y, all = union, by = FIXED_PARAMS), data_list)
    counts = rowSums(merged_data[ ,grepl("count", names(merged_data))], na.rm = TRUE)
    if (average) {counts = counts / length(samples)}
    firstreads = rowSums(merged_data[,grepl("firstread", names(merged_data))], na.rm = TRUE) / length(samples)
  }
  new@decode@data = data.frame(count = counts,
                               firstread = firstreads,
                               code = merged_data$code,
                               size = merged_data$size,
                               is_valid = merged_data$is_valid,
                               id = merged_data$id,
                               dist_orig = merged_data$dist_orig)

  new@decode@saturation = integer(0)
  new@decode@read_ids = list()

  # fill in the metadata slot
  meta_list = c()
  meta_list = lapply(samples_list, function(x) c(meta_list, x@meta))
  meta = Reduce(function(x, y) merge(x, y, all = TRUE), meta_list)
  if (length(samples_list) == 1) new@meta = data.frame(meta)
  else new@meta = meta[lapply(meta, function(x) length(unique(x))) == 1][1, ]

  # files slot
  new@files = list()
  new@files = lapply(samples_list, function(x) list.append(new@files, x@files))

  # decode stats slot
  for (stat in names(samples_list[[1]]@decode_stats)) {
    values = lapply(samples_list, function(x) x@decode_stats[[stat]])
    new@decode_stats[[stat]] = sum(unlist(values))
  }

  return (new)
})

#' Get metadata of a merged experiment (old version)
#'
#' @param x loxcode_experiment object
#' @param s set of merged samples
#' @return a data frame of meta data
#' @export

setGeneric("get_collapsed_meta2", function(x, s) {standardGeneric("get_collapsed_meta2")})

setMethod("get_collapsed_meta2", "loxcode_experiment", function(x, s) {

  counts = x@count_matrixes[[s]]
  sample_names = names(counts)

  # Divide samples into collapsed vs non-collapsed
  non_col_names = intersect(names(x@samples), names(counts))
  col_names = setdiff(sample_names, non_col_names)

  # metadata of collapsed samples
  df1 = data.frame(stringsAsFactors = FALSE)
  if (length(col_names) > 0) {
    col_meta = strsplit(col_names, split = "__")
    for (i in 1:length(col_meta)) {
      row = data.frame("sample_name" = col_names[[i]], t(col_meta[[i]]), stringsAsFactors = FALSE)
      df1 = plyr::rbind.fill(df1, row)
    }

    used = c()
    for (i in 2:ncol(df1)) {
      for (j in 1:ncol(x@meta)) {
        if (!names(x@meta)[[j]] %in% used & all(df1[[i]] %in% x@meta[[j]])) {
          names(df1)[[i]] <- names(x@meta)[[j]]
          used = c(used, names(x@meta)[[j]])
          break
        }
      }
    }
  }

  # metadata of non_collapsed samples
  df2 = subset(x@meta, sample_name %in% non_col_names)

  library(plyr)
  df = rbind.fill(df2, df1)
  row.names(df) = df$sample_name
  df$sample_name = NULL

  return(df)
})

#' Get metadata of a merged experiment
#'
#' @param x loxcode_experiment object
#' @param s set of merged samples
#' @return a data frame of meta data
#' @export

setGeneric("get_collapsed_meta", function(x, s) {standardGeneric("get_collapsed_meta")})

setMethod("get_collapsed_meta", "loxcode_experiment", function(x, s) {

  counts = x@count_matrixes[[s]]
  sample_names = names(counts)

  # Divide samples into collapsed vs non-collapsed
  non_col_names = intersect(names(x@samples), names(counts))
  col_names = setdiff(sample_names, non_col_names)

  # metadata of collapsed samples
  meta = sapply(x@meta, unique)
  df1 = data.frame(matrix(ncol = length(meta), nrow = length(col_names)))
  names(df1) = names(meta)
  df1$sample_name = col_names
  if (length(col_names)) {
    for (i in 1:length(col_names)) {
      components = unlist(strsplit(col_names[i], split = "__"))
      if (length(components)==1 & grepl("NA", components[[1]])) {
        # no metadata to insert
        break
      }
      else {
        used_cols = c()
        for (metadata in components) {
          possible_cols = names(meta)[sapply(meta, function(x) metadata %in% x)]
          possible_cols = setdiff(possible_cols, intersect(possible_cols, used_cols))
          df1[i, possible_cols[[1]]] = metadata
          used_cols = c(used_cols, possible_cols[1])
        }
      }
    }
  }

  # metadata of non_collapsed samples
  df2 = subset(x@meta, sample_name %in% non_col_names)

  library(plyr)
  df = rbind.fill(df2, df1)
  df = df[!is.na(df$sample_name), ]
  row.names(df) = df$sample_name
  df$sample_name = NULL

  return(df)
})

#' Create new filtered code set according to defined parameters
#'
#' @param x loxcode_experiment object
#' @param s independent sample set
#' @param c code set
#' @param t tolerance threshold for repeated proportions
#' @param m maximum repeats tolerated
#' @param n new code set name
#' @return new loxcode_experiment object
#' @export
setGeneric("make_filtered_codeset", function(x, s, c, t, m, n) {standardGeneric("make_filtered_codeset")})

setMethod("make_filtered_codeset", "loxcode_experiment", function(x, s, c, t, m, n) {
  codeset = x@code_sets[[c]]
  YY = filtered_codes_table(x, s, c, t, m)
  x@code_sets[[n]] = subset(codeset,paste(codeset$size,codeset$dist_orig)%in%paste(YY$size,YY$dist_orig))
  return (x)
})

#' rename code set
#'
#' @param x loxcode_experiment object
#' @param c code set
#' @param n new code set name
#' @return new loxcode_experiment object
#' @export
setGeneric("rename_codeset", function(x, c, n) {standardGeneric("rename_codeset")})

setMethod("rename_codeset", "loxcode_experiment", function(x, c, n) {
  if (c %in% c("all_codes", "invalid_codes", "valid_codes")) {
    return(x)
  }

  temp = x@code_sets[[c]]
  x@code_sets[[c]] <- NULL
  x@code_sets[[n]] = temp
  return(x)
})


#' rename sample set
#'
#' @param x loxcode_experiment object
#' @param s sample set
#' @param n new code set name
#' @return new loxcode_experiment object
#' @export
setGeneric("rename_sampleset", function(x, s, n) {standardGeneric("rename_sampleset")})

setMethod("rename_sampleset", "loxcode_experiment", function(x, s, n) {
  if (s %in% c("all_samples")) {
    return(x)
  }

  temp = x@count_matrixes[[s]]
  x@count_matrixes[[s]] <- NULL
  x@count_matrixes[[n]] = temp
  temp = x@alias[[s]]
  x@alias[[s]] <- NULL
  x@alias[[n]] = temp
  return(x)
})

#' generate sample aliases
#'
#' @param lox loxcode_experiment object
#' @param s sample set
#' @param meta columns of meta data on which to collapse
#' @return loxcode_experiment object with aliases generated
#' @export

setGeneric("generate_alias", function(lox, s="all_samples", meta) {standardGeneric("generate_alias")})

setMethod("generate_alias", "loxcode_experiment", function(lox, s="all_samples", meta) {
  x = lox
  aliases = x@alias[[s]]
  metadata = get_collapsed_meta(lox, s)
  metadata$sample_name = row.names(metadata)
  row.names(metadata) = seq(1, nrow(metadata))

  for (i in 1:nrow(aliases)) {
    sample = aliases$sample_name[[i]]
    index = match(sample, metadata$sample_name)
    row = metadata[index,]
    alias = paste(row[meta], collapse="_")
    aliases$alias[[i]] = alias
  }

  counts = list()
  length(counts) = length(unique(aliases$alias))
  names(counts) = unique(aliases$alias)

  for (i in 1:nrow(aliases)) {
    alias = aliases$alias[[i]]
    if (is.numeric(counts[[alias]])) {
      counts[[alias]] = counts[[alias]] + 1
    } else {
      counts[[alias]] = 1
    }
    alias = paste(alias, as.character(counts[[alias]]), sep="_")
    aliases$alias[[i]] = alias
  }

  x@alias[[s]] = aliases

  return(x)
})

#' Merge two experiments
#'
#' @param lox1 loxcode_experiment 1
#' @param lox2 loxcode_experiment 2
#' @param name experiment name
#' @return new merged loxcode_experiment
#' @export

setGeneric("merge_experiments", function(lox1, lox2, name=NULL) {standardGeneric("merge_experiments")})

setMethod("merge_experiments", "loxcode_experiment", function(lox1, lox2, name=NULL) {
  new = new("loxcode_experiment")
  duplicates = c()
  rename1 = c()
  rename2 = c()

  # name slot
  if (is.null(name)) {
    if (lox1@name == lox2@name) {
      lox1@name = paste0(lox1@name, "_1")
      lox2@name = paste0(lox2@name, "_2")
    }
    new@name = paste(lox1@name, lox2@name, collapse="+")
  } else {
    new@name = name
  }

  # directory slot
  new@dir = c(lox1@dir, lox2@dir)

  # samples slot
  for (i in 1:length(lox1@samples)) {
    lox1@samples[[i]]@meta$experiment = lox1@name
  }
  for (i in 1:length(lox2@samples)) {
    lox2@samples[[i]]@meta$experiment = lox2@name

    # find duplicates
    if (lox2@samples[[i]]@name %in% names(lox1@samples)) {
      duplicates = c(duplicates, lox2@samples[[i]]@name)
      rename1 = c(rename1, paste0(lox2@samples[[i]]@name, "_1"))
      rename2 = c(rename2, paste0(lox2@samples[[i]]@name, "_2"))
    }
  }

  names(rename1) <- duplicates
  names(rename2) <- duplicates
  library(plyr)
  names(lox1@samples) = revalue(names(lox1@samples), rename1, warn_missing = FALSE)
  names(lox2@samples) = revalue(names(lox2@samples), rename2, warn_missing = FALSE)
  new@samples = c(lox1@samples, lox2@samples)

  for (i in 1:length(new@samples)) {
    new@samples[[i]]@name = names(new@samples)[i]
  }

  # samp_table slot
  library(plyr)
  lox1@samp_table$experiment = lox1@name
  lox2@samp_table$experiment = lox2@name
  lox1@samp_table$sample = revalue(lox1@samp_table$sample, rename1, warn_missing = FALSE)
  lox2@samp_table$sample = revalue(lox2@samp_table$sample, rename2, warn_missing = FALSE)
  new@samp_table = plyr::rbind.fill(lox1@samp_table, lox2@samp_table)

  # count_matrixes slot
  for (i in 1:length(lox1@count_matrixes)) {
    names(lox1@count_matrixes[[i]]) = revalue(names(lox1@count_matrixes[[i]]), rename1, warn_missing = FALSE)
  }
  for (i in 1:length(lox2@count_matrixes)) {
    names(lox2@count_matrixes[[i]]) = revalue(names(lox2@count_matrixes[[i]]), rename2, warn_missing = FALSE)
  }

  all1 = lox1@count_matrixes[["all_samples"]]
  all2 = lox2@count_matrixes[["all_samples"]]

  all_samples = merge(all1, all2, by = 0, all=TRUE)
  all_samples[is.na(all_samples)] <- 0
  row.names(all_samples) = all_samples$Row.names
  all_samples$Row.names = NULL

  counts1 = lox1@count_matrixes
  counts2 = lox2@count_matrixes
  counts1[["all_samples"]] <- NULL
  counts2[["all_samples"]] <- NULL
  counts1[[paste0("all_samples_", lox1@name)]] = all1
  counts2[[paste0("all_samples_", lox2@name)]] = all2

  new@count_matrixes[["all_samples"]] = all_samples
  for (c in names(counts1)) {
    new@count_matrixes[[c]] = counts1[[c]]
  }
  for (c in names(counts2)) {
    new@count_matrixes[[c]] = counts2[[c]]
  }

  # code_sets slot
  codes1 = lox1@code_sets
  codes2 = lox2@code_sets
  new@code_sets[["all_codes"]] = unique(rbind.fill(codes1[["all_codes"]], codes2[["all_codes"]]))
  new@code_sets[[paste0("all_codes_", lox1@name)]] = codes1[["all_codes"]]
  new@code_sets[[paste0("all_codes_", lox2@name)]] = codes2[["all_codes"]]

  new@code_sets[["valid_codes"]] = unique(rbind.fill(codes1[["valid_codes"]], codes2[["valid_codes"]]))
  new@code_sets[[paste0("valid_codes_", lox1@name)]] = codes1[["valid_codes"]]
  new@code_sets[[paste0("valid_codes_", lox2@name)]] = codes2[["valid_codes"]]

  new@code_sets[["invalid_codes"]] = unique(rbind.fill(codes1[["invalid_codes"]], codes2[["invalid_codes"]]))
  new@code_sets[[paste0("invalid_codes_", lox1@name)]] = codes1[["invalid_codes"]]
  new@code_sets[[paste0("invalid_codes_", lox2@name)]] = codes2[["invalid_codes"]]

  others1 = codes1[!names(codes1) %in% c("all_codes", "valid_codes", "invalid_codes")]
  others2 = codes2[!names(codes2) %in% c("all_codes", "valid_codes", "invalid_codes")]

  for (c in names(others1)) {
    new@code_sets[[c]] = others1[[c]]
  }
  for (c in names(others2)) {
    new@code_sets[[c]] = others2[[c]]
  }

  # meta slot
  lox1@meta$sample_name = revalue(lox1@meta$sample_name, rename1, warn_missing = FALSE)
  lox2@meta$sample_name = revalue(lox2@meta$sample_name, rename2, warn_missing = FALSE)
  lox1@meta$experiment = lox1@name
  lox2@meta$experiment = lox2@name
  new@meta = plyr::rbind.fill(lox1@meta, lox2@meta)

  # alias slot
  for (i in 1:length(lox1@alias)) {
    lox1@alias[[i]]$sample_name = revalue(lox1@alias[[i]]$sample_name, rename1, warn_missing = FALSE)
  }
  for (i in 1:length(lox2@alias)) {
    lox2@alias[[i]]$sample_name = revalue(lox2@alias[[i]]$sample_name, rename2, warn_missing = FALSE)
  }

  all_aliases = data.frame(sample_name = names(all_samples)[!names(all_samples) == "Row.names"],
                           stringsAsFactors = FALSE)
  all_aliases$alias = paste0("Sample ", seq(1, nrow(all_aliases)))

  new@alias[["all_samples"]] = all_aliases

  aliases1 = lox1@alias
  aliases2 = lox2@alias

  all1 = aliases1[["all_samples"]]
  all2 = aliases2[["all_samples"]]

  aliases1[["all_samples"]] <- NULL
  aliases2[["all_samples"]] <- NULL

  aliases1[[paste0("all_samples_", lox1@name)]] = all1
  aliases2[[paste0("all_samples_", lox2@name)]] = all2

  for (c in names(aliases1)) {
    new@alias[[c]] = aliases1[[c]]
  }
  for (c in names(aliases2)) {
    new@alias[[c]] = aliases2[[c]]
  }

  return(new)
})

#' Merge a vector of experiments
#'
#' @param experiments vector of loxcode_experiment objects
#' @param name name of new loxcode_experiment object
#' @return loxcode_experiment object
#'
#' @export

merge_experiments2 <- function(experiments, name) {
  new = new("loxcode_experiment")
  new@name = name
  for (exp in experiments) {
    new@dir = c(new@dir, exp@dir)
  }

  return (new)
}

#' Collapse selected samples
#'
#' @param lox loxcode_experiment object
#' @param s sample set/ count_matrix
#' @param c code set
#' @param i indices of samples to collapse
#' @param n new sample name
#' @param union boolean: use union or intersection of samples
#' @param average boolean: use average of sum of counts
#' @export

setGeneric("collapse_selection", function(lox, s="all_samples", c="all_codes", i, n=NULL, union=TRUE, average=FALSE) {standardGeneric("collapse_selection")})

setMethod("collapse_selection", "loxcode_experiment", function(lox, s="all_samples", c="all_codes", i, n=NULL, union=TRUE, average=FALSE) {
  counts = lox@count_matrixes[[s]]
  counts = subset(counts, row.names(counts) %in% lox@code_sets[[c]]$code)
  meta = get_collapsed_meta(lox, s)[i, ]

  sample_name = c()
  for (k in 1:ncol(meta)) {
    if (sum(is.na(meta[, k])) == 0 & dim(table(meta[, k])) == 1) {
      sample_name = c(sample_name, meta[1, k])
    }
  }
  sample_name = paste0(sample_name, collapse = "__")
  if (sample_name == "") {
    sample_name = paste0("NA", sum(grepl("NA", names(counts))))
  }

  cols = data.frame(row.names = seq(1:nrow(counts)))
  cols[[sample_name]] = NA
  for(k in 1:nrow(counts)) {
    if (union==TRUE & average==TRUE) {
      cols[k, 1] = sum(counts[k, i]) / length(i)
    }
    if (union==TRUE & average==FALSE) {
      cols[k, 1] = sum(counts[k, i])
    }
    if (union==FALSE & average==TRUE) {
      cols[k, 1] = sum(counts[k, i]) * as.numeric(sum(counts[k, i]==0)==0) / length(i)
    }
    if (union==FALSE & average==FALSE) {
      cols[k, 1] = sum(counts[k, i]) * as.numeric(sum(counts[k, i]==0)==0)
    }
  }

  counts = cbind(counts, cols)
  counts[, i] = NULL
  lox@count_matrixes[[s]] = counts

  # update the aliases
  aliases = data.frame("sample_name" = names(counts), "alias" = NA)
  aliases$alias = paste0("Sample ", seq(1:nrow(aliases)))
  lox@alias[[s]] = aliases

  return (lox)
})
jngwehi/loxcodeR documentation built on March 17, 2020, 5:32 p.m.