R/plots.R

Defines functions barcode_union get_barcode_stats_rep get_comparison_table get_comparison_table2 strCap capitalize

#' Plots distribution of loxcode sizes found in sample
#'
#' @param lox loxcode_experiment object
#' @param sample loxcode_sample object
#' @param count_matrix count_matrix
#' @param code_set code_set
#' @param labels sample name or alias
#' @export
setGeneric("size_plot", function(lox, sample, count_matrix="all_samples", code_set="all_codes", labels="alias") {standardGeneric("size_plot")})

setMethod("size_plot", "loxcode_experiment", function(lox, sample, count_matrix="all_samples", code_set="all_codes", labels="alias"){
  counts = lox@count_matrixes[[count_matrix]]
  counts$codes = row.names(counts)
  counts = subset(counts, counts[, sample] > 0)
  codes = lox@code_sets[[code_set]]
  data = subset(codes, code %in% counts$codes)

  title = switch(labels, "sample" = sample, "alias" = get_alias(lox, count_matrix, sample))

  g = ggplot(data) + geom_bar(aes(as.factor(size), fill = as.factor(is_valid)), position = "stack", stat = "count") +
    xlab("size") + ylab("diversity") +
    ggtitle(title)

  return(g)
})

#' Plots distribution of dist_orig found in sample
#'
#' @param lox loxcode_experiment object
#' @param sample loxcode_sample object
#' @param count_matrix count_matrix
#' @param code_set code_set
#' @param labels sample name or alias
#' @export
setGeneric("dist_orig_plot", function(lox, sample, count_matrix="all_samples", code_set="all_codes", labels="alias") {standardGeneric("dist_orig_plot")})

setMethod("dist_orig_plot", "loxcode_experiment", function(lox, sample, count_matrix="all_samples", code_set="all_codes", labels="alias"){
  # x = loxcode_sample
  # u <- valid(x)
  # #u <- u[u$size == size, ]
  # fill_scale <- scale_fill_manual(breaks = 0:15, values = rep('blue', 16)) # use twice since gradient is hard to see otherwise
  # g <- ggplot(data = u) + facet_wrap(~size) + geom_bar(aes(x = dist_orig, fill = factor(dist_orig)), show.legend = F) + scale_x_continuous(breaks = 0:10, limits = c(0, 10)) +
  #   xlab("Distance from origin") + ylab("Diversity") + ggtitle(loxcoder::name(x))
  counts = lox@count_matrixes[[count_matrix]]
  counts$codes = row.names(counts)
  counts = subset(counts, counts[, sample] > 0)
  codes = lox@code_sets[[code_set]]
  codes[is.na(codes)] = 0
  data = subset(codes, code %in% counts$codes)

  title = switch(labels, "sample" = sample, "alias" = get_alias(lox, count_matrix, sample))

  g = ggplot(data) + facet_wrap(~size) + geom_bar(aes(dist_orig, fill = factor(dist_orig)), show.legend = FALSE) +
    scale_x_continuous(breaks = 0:10, limits = c(0, 10)) +
    xlab("Distance from origin") + ylab("diversity") +
    ggtitle(title)

  return(g)
})

#' Produce rank-count plot
#'
#' In the resulting plot, barcodes of specified size are ranked by read count. log10(count) is then plotted
#' against barcode rank in black. In addition, dist_orig for each barcode is shown in red with the same horizontal axis.
#'
#' @param x loxcode_sample object
#' @param size loxcode size to consider
#' @param ymax max y-value for counts
#' @param dmax max d-value for dist_orig
#' @export
setGeneric("rank_count_plot", function(x, size, ymax, dmax) {standardGeneric("rank_count_plot")})

setMethod("rank_count_plot", "loxcode_sample", function(x, size, ymax, dmax){
  u <- valid(x)
  u <- u[u$size == size, ]
  u <- u[order(u$count, decreasing = T), ]
  # u$count <- u$count/sum(u$count) # use frequency instead of raw count
  ggplot(data = u) + geom_point(aes(x = 1:nrow(u), y = log10(count))) +
    geom_point(aes(x = 1:nrow(u), y = dist_orig*ymax/dmax), color = 'red') +
    scale_x_log10(breaks = 1:10, labels = u$code[1:10]) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
    xlab('code') + ylab('log10(count)') +
    ggtitle(sprintf('%s; size = %d', loxcoder::name(x), size)) +
    scale_y_continuous(limits = c(0, ymax), sec.axis = sec_axis(~.*(dmax/ymax), name = 'distance'))
})

#' Produce pair comparison scatterplot
#'
#' For each barcode in x1, x2, read counts in each sample are plotted on a log-scale, after adjusting sample x2 to match the
#' total read count (for valid barcodes) in sample x1. Barcodes present in one sample and not the other are shown
#' horizontally and vertically (below 0) using beeswarm plots
#'
#' @param lox loxcode_experiment object
#' @param set sample set
#' @param codeset code set
#' @param x1 loxcode_sample object for sample 1
#' @param x2 loxcode_sample object for sample 2
#' @param range c(lower, upper), specifying the range of dist_orig values to consider. Both bounds are inclusive.
#'
#' @export
setGeneric("pair_comparison_plot", function(lox, set, codeset="all_codes", x1, x2, range, ...) {standardGeneric("pair_comparison_plot")})

setMethod("pair_comparison_plot", "loxcode_experiment", function(lox, set, codeset="all_codes", x1, x2, range, plot="size", labels = "alias"){
  u <- get_comparison_table(x1, x2, plot)
  u <- u[u$code %in% lox@code_sets[[codeset]]$code,]

  name1 = switch(labels, "alias" = get_alias(lox, set, x1@name), "sample" = x1@name)
  name2 = switch(labels, "alias" = get_alias(lox, set, x2@name), "sample" = x2@name)

  nonzero_mask <- (u$rep1_count > 0 & u$rep2_count > 0)
  rep1_zero_mask <- (u$rep1_count == 0)
  rep2_zero_mask <- (u$rep2_count == 0)

  theme_update(plot.title = element_text(hjust = 0.5))

  if (plot=="size") {
    g <- ggplot() +
      geom_point(aes(y = log10(1 + u$rep1_count[nonzero_mask]) , x = log10(1 + u$rep2_count[nonzero_mask]),
                     color = as.factor(u$size[nonzero_mask])), alpha = 1)  +
      geom_abline(alpha=0.75) + ylab(paste('log10(reads in ', name1, ')')) + xlab(paste('log10(reads in ', name2, ')')) +
      ggtitle(paste(name1, 'vs', name2)) + scale_color_discrete(name="Size")

    if (name1!=name2) {
      g <- g +
        geom_quasirandom(aes(y = rep(-0.2, sum(rep1_zero_mask)), x = log10(1 + u$rep2_count[rep1_zero_mask]), color = as.factor(u$size[rep1_zero_mask])), groupOnX = F, width = 0.2, alpha = 1) +
        geom_quasirandom(aes(y = log10(1 + u$rep1_count[rep2_zero_mask]), x = rep(-0.2, sum(rep2_zero_mask)), color = as.factor(u$size[rep2_zero_mask])), groupOnX = T, width = 0.2, alpha = 1)
    }
  }

  if (plot=="complexity") {
    g <- ggplot() +
      geom_point(aes(y = log10(1 + u$rep1_count[nonzero_mask]) , x = log10(1 + u$rep2_count[nonzero_mask]),
                     color = as.factor(u$dist_orig[nonzero_mask])), alpha = 1) +
      geom_abline(alpha=0.75) + ylab(paste('log10(reads in ', name1, ')')) + xlab(paste('log10(reads in ', name2, ')')) +
      ggtitle(paste(name1, 'vs', name2)) + scale_color_discrete(name="Complexity")

    if (name1!=name2){
      g <- g +
        geom_quasirandom(aes(y = rep(-0.2, sum(rep1_zero_mask)), x = log10(1 + u$rep2_count[rep1_zero_mask]), color = as.factor(u$size[rep1_zero_mask])), groupOnX = F, width = 0.2, alpha = 1) +
        geom_quasirandom(aes(y = log10(1 + u$rep1_count[rep2_zero_mask]), x = rep(-0.2, sum(rep2_zero_mask)), color = as.factor(u$size[rep2_zero_mask])), groupOnX = T, width = 0.2, alpha = 1)
    }
  }

  return(g)
})

#' @export
barcode_union <- function(rep1, rep2, type="size", range){
  if (type=="size") {
    x = unique(c(filter(rep1@decode@data, size >= range[1], size <= range[2])$code,
                filter(rep2@decode@data, size >= range[1], size <= range[2])$code))
  } else if (type=="complexity") {
    x = unique(c(filter(rep1@decode@data, dist_orig >= range[1], dist_orig <= range[2])$code,
                 filter(rep2@decode@data, dist_orig >= range[1], dist_orig <= range[2])$code))
  }

  return (x)
}

#' @export
get_barcode_stats_rep <- function(union_bc, rep){
  index <- match(union_bc, rep@decode@data$code)
  # u <- loxcoder::valid(rep)[index, ]
  u <- rep@decode@data[index, ]
  u$count[is.na(u$count)] <- 0
  u$code <- union_bc
  return(u)
}

#' @export
get_comparison_table <- function(rep1, rep2, type="size", range=c(1,13)){
  bc_union <- barcode_union(rep1, rep2, type, range)
  u1 <- get_barcode_stats_rep(bc_union, rep1)
  u2 <- get_barcode_stats_rep(bc_union, rep2)
  u1$size <- ifelse(is.na(u1$size), u2$size, u1$size)
  u2$size <- u1$size
  u1$dist_orig <- ifelse(is.na(u1$dist_orig), u2$dist_orig, u1$dist_orig)
  u2$dist_orig <- u1$dist_orig
  # scale by total number of reads
  u1$count <- u1$count
  u2$count <- u2$count*(sum(loxcoder::data(rep1)$count)/sum(loxcoder::data(rep2)$count))

  u <- data.frame(code = bc_union,
                  size = u1$size,
                  dist_orig = u1$dist_orig,
                  rep1_count = u1$count,
                  rep2_count = u2$count)

  nonzero_mask <- (u$rep1_count > 0 & u$rep2_count > 0)
  rep1_zero_mask <- (u$rep1_count == 0)
  rep2_zero_mask <- (u$rep2_count == 0)

  u$log_rep1[nonzero_mask] = u$rep1_count[nonzero_mask]
  u$log_rep2[nonzero_mask] = u$rep2_count[nonzero_mask]

  return(u)
}

#' Pair Comparison Plot 2
#'
#' @param lox Loxcode_experiment object
#' @param sampleset Sample set
#' @param codeset Code set
#' @param s1 First Loxcode_sample
#' @param s2 Second Loxcode_sample
#' @param colorBy Color points by size/dist_orig/firstread
#' @param labels Sample name or alias as labels
#'
#' @export
setGeneric("pair_comparison_plot2", function(lox, sampleset="all_samples", codeset="all_codes", s1, s2, colorBy="size", labels="alias", sizeRange=NULL, dist_origRange=NULL, firstreadRange=NULL) {standardGeneric("pair_comparison_plot2")})

setMethod("pair_comparison_plot2", "loxcode_experiment", function(lox, sampleset="all_samples", codeset="all_codes", s1, s2, colorBy="size", labels="alias", sizeRange=NULL, dist_origRange=NULL, firstreadRange=NULL) {

  # Labels
  name1 = switch(labels, "alias" = get_alias(lox, sampleset, s1@name), "sample" = s1@name)
  name2 = switch(labels, "alias" = get_alias(lox, sampleset, s2@name), "sample" = s2@name)

  # Comparison Table
  comp_table = get_comparison_table2(s1, s2)

  # Subset Comparison Table
  comp_table <- comp_table[comp_table$code %in% lox@code_sets[[codeset]]$code, ]
  if (!is.null(sizeRange)) {
    comp_table <- subset(comp_table, size >= sizeRange[1] & size <= sizeRange[2])
  }
  if (!is.null(dist_origRange)) {
    comp_table <- subset(comp_table, dist_orig >= dist_origRange[1] & dist_orig <= dist_origRange[2])
  }
  if (!is.null(firstreadRange)) {
    comp_table <- subset(comp_table, firstread >= firstreadRange[1] & firstread <= firstreadRange[2])
  }

  nonzero_mask <- (comp_table$s1_count > 0 & comp_table$s2_count > 0)
  s1_zero_mask <- (comp_table$s1_count == 0)
  s2_zero_mask <- (comp_table$s2_count == 0)

  theme_update(plot.title = element_text(hjust = 0.5))
  color = switch(colorBy,
                 "size" = as.factor(comp_table$size[nonzero_mask]),
                 "dist_orig" = as.factor(comp_table$dist_orig[nonzero_mask]),
                 "firstread" = log10(comp_table$firstread[nonzero_mask]))

  # Main plot
  g = ggplot() +
    geom_point(aes(y = comp_table$log_s1[nonzero_mask] , x = comp_table$log_s2[nonzero_mask], color = color)) +
    geom_abline(alpha = 0.75) +
    ylab(paste0('log10(1 + reads in ', name1, ')')) +
    xlab(paste0('log10(1 + reads in ', name2, ')')) +
    ggtitle(paste(name1, 'vs', name2))

  if (colorBy == "firstread") {
    g = g + scale_color_continuous(name = colorBy)
  } else {
    g = g + scale_color_discrete(name = colorBy)
  }

  # Barcodes only in one sample
  if (name1 != name2) {
    color1 = switch(colorBy,
                   "size" = as.factor(comp_table$size[s1_zero_mask]),
                   "dist_orig" = as.factor(comp_table$dist_orig[s1_zero_mask]),
                   "firstread" = log10(comp_table$firstread[s1_zero_mask]))
    color2 = switch(colorBy,
                    "size" = as.factor(comp_table$size[s2_zero_mask]),
                    "dist_orig" = as.factor(comp_table$dist_orig[s2_zero_mask]),
                    "firstread" = log10(comp_table$firstread[s2_zero_mask]))

    g = g +
      geom_quasirandom(aes(y = rep(-0.2, sum(s1_zero_mask)), x = log10(1 + comp_table$s2_count[s1_zero_mask]), color = color1), groupOnX = FALSE, width = 0.2) +
      geom_quasirandom(aes(y = log10(1 + comp_table$s1_count[s2_zero_mask]), x = rep(-0.2, sum(s2_zero_mask)), color = color2), groupOnX = TRUE, width = 0.2)
  }

  return(g)
})

#' Comparison table (v3)
#'
#' Gets the comparison table used to produce the pair_comparison plot
#' @param lox Loxcode_experiment object
#' @param sampleset sample set
#' @param codeset code set
#' @param s1 name of sample 1
#' @param s2 name of sample 2
#' @param colorBy Color points by size/dist_orig/firstread
#' @param labels Sample name or alias as labels
#' @param sizeRange restrict the range of code sizes displayed
#' @param dist_origRange restrict the range of code complexities displayed
#' @param firstreadRange restrict the range of firstreads displayed
#' @return pair comparison plot
#'
#' @export
setGeneric("pair_comparison_plot3", function(lox, sampleset="all_samples", codeset="all_codes", s1, s2, colorBy="size", labels="alias", sizeRange=NULL, dist_origRange=NULL, firstreadRange=NULL) {standardGeneric("pair_comparison_plot3")} )

setMethod("pair_comparison_plot3", "loxcode_experiment", function(lox, sampleset="all_samples", codeset="all_codes", s1, s2, colorBy="size", labels="alias", sizeRange=NULL, dist_origRange=NULL, firstreadRange=NULL) {

  # Labels
  name1 = switch(labels, "alias" = get_alias(lox, sampleset, s1@name), "sample" = s1@name)
  name2 = switch(labels, "alias" = get_alias(lox, sampleset, s2@name), "sample" = s2@name)

  # Comparison Table
  comp_table = get_comparison_table3(lox, sampleset, )

  # Subset Comparison Table
  comp_table <- comp_table[comp_table$code %in% lox@code_sets[[codeset]]$code, ]
  if (!is.null(sizeRange)) {
    comp_table <- subset(comp_table, size >= sizeRange[1] & size <= sizeRange[2])
  }
  if (!is.null(dist_origRange)) {
    comp_table <- subset(comp_table, dist_orig >= dist_origRange[1] & dist_orig <= dist_origRange[2])
  }
  if (!is.null(firstreadRange)) {
    comp_table <- subset(comp_table, firstread >= firstreadRange[1] & firstread <= firstreadRange[2])
  }

  nonzero_mask <- (comp_table$s1_count > 0 & comp_table$s2_count > 0)
  s1_zero_mask <- (comp_table$s1_count == 0)
  s2_zero_mask <- (comp_table$s2_count == 0)

  theme_update(plot.title = element_text(hjust = 0.5))
  color = switch(colorBy,
                 "size" = as.factor(comp_table$size[nonzero_mask]),
                 "dist_orig" = as.factor(comp_table$dist_orig[nonzero_mask]),
                 "firstread" = log10(comp_table$firstread[nonzero_mask]))

  # Main plot
  g = ggplot() +
    geom_point(aes(y = comp_table$log_s1[nonzero_mask] , x = comp_table$log_s2[nonzero_mask], color = color)) +
    geom_abline(alpha = 0.75) +
    ylab(paste0('log10(1 + reads in ', name1, ')')) +
    xlab(paste0('log10(1 + reads in ', name2, ')')) +
    ggtitle(paste(name1, 'vs', name2))

  if (colorBy == "firstread") {
    g = g + scale_color_continuous(name = colorBy)
  } else {
    g = g + scale_color_discrete(name = colorBy)
  }

  # Barcodes only in one sample
  if (name1 != name2) {
    color1 = switch(colorBy,
                    "size" = as.factor(comp_table$size[s1_zero_mask]),
                    "dist_orig" = as.factor(comp_table$dist_orig[s1_zero_mask]),
                    "firstread" = log10(comp_table$firstread[s1_zero_mask]))
    color2 = switch(colorBy,
                    "size" = as.factor(comp_table$size[s2_zero_mask]),
                    "dist_orig" = as.factor(comp_table$dist_orig[s2_zero_mask]),
                    "firstread" = log10(comp_table$firstread[s2_zero_mask]))

    g = g +
      geom_quasirandom(aes(y = rep(-0.2, sum(s1_zero_mask)), x = log10(1 + comp_table$s2_count[s1_zero_mask]), color = color1), groupOnX = FALSE, width = 0.2) +
      geom_quasirandom(aes(y = log10(1 + comp_table$s1_count[s2_zero_mask]), x = rep(-0.2, sum(s2_zero_mask)), color = color2), groupOnX = TRUE, width = 0.2)
  }

  return(g)
})

#' @export
get_comparison_table2 <- function(s1, s2) {

  # table of barcodes in both samples
  counts = merge(s1@decode@data, s2@decode@data, by="code", all=TRUE)
  counts$dist_orig = rowMeans(counts[ , grepl("dist_orig", names(counts))], na.rm = TRUE)
  counts$size = rowMeans(counts[ , grepl("size", names(counts))], na.rm = TRUE)
  counts$firstread = rowMeans(counts[ , grepl("firstread", names(counts))], na.rm = TRUE)
  counts$count.x[is.na(counts$count.x)] <- 0
  counts$count.y[is.na(counts$count.y)] <- 0

  table = data.frame(code = counts$code,
                     size = counts$size,
                     dist_orig = counts$dist_orig,
                     firstread = counts$firstread,
                     s1_count = counts$count.x,
                     s2_count = counts$count.y,
                     stringsAsFactors = FALSE)

  # scale by total number of reads
  table$s2_count <- table$s2_count * (sum(loxcoder::data(s1)$count) / sum(loxcoder::data(s2)$count))

  nonzero_mask <- (table$s1_count > 0 & table$s2_count > 0)
  s1_zero_mask <- (table$s1_count == 0)
  s2_zero_mask <- (table$s2_count == 0)

  # log of counts
  table$log_s1[nonzero_mask] = log10(1 + table$s1_count[nonzero_mask])
  table$log_s2[nonzero_mask] = log10(1 + table$s2_count[nonzero_mask])

  return(table)
}

#' Show recombination distance distribution by size as beeswarm plot
#'
#' Produces a beeswarm plot where for each cassette size, individual distinct barcodes are shown as
#' points. Size and color of points correspond to the read count of each barcode.
#' @param x loxcode_sample object
#' @param count_threshold counts threshold, barcodes with a count number exceeding this threshold are ignored. This is in order
#' to avoid having very large points resulting from barcodes with disproportionate read counts.
#' @export
setGeneric("dist_count_beeswarm_plot", function(x, count_threshold) {standardGeneric("dist_count_beeswarm_plot")})

setMethod("dist_count_beeswarm_plot", "loxcode_sample", function(x, count_threshold){
  loxcoder::valid(x) %>% filter(count < count_threshold) -> y
  g <- ggplot(data = y) + geom_quasirandom(width = 0.9, groupOnX = F, aes(x = dist_orig, y = size, size = count, color = count), alpha = 0.2) +
    scale_size_area("num_reads") +
    scale_x_continuous("distance from origin", breaks = 0:15) +
    scale_y_continuous("size", breaks = c(3, 5, 7, 9, 13)) +
    scale_color_gradient(low = 'blue', high = 'red') +
    ggtitle(loxcoder::name(x))
  return(g)
})

#' Generate heatmap from count_matrix (TW/31/10/2019)
#' @param loxcode_experiment loxcode_experiment object
#' @export
setGeneric("heatmap_plot", function(loxcode_experiment,count_matrix="all_samples",code_set="all_codes",style="ggplot", labels="sample", clustering="none",agglomeration="complete",min_reads=0,max_repeats=100,min_repeats=1) {standardGeneric("heatmap_plot")})

setMethod("heatmap_plot", "loxcode_experiment", function(loxcode_experiment,count_matrix="all_samples",code_set="all_codes",style="ggplot", labels="sample", clustering="none",agglomeration="complete",min_reads=0,max_repeats=100,min_repeats=1){
  x = loxcode_experiment

  #clustering
  cluster_row = switch(clustering, "none"=FALSE, "row"=TRUE, "col"=FALSE, "both"=TRUE)
  cluster_col = switch(clustering, "none"=FALSE, "row"=FALSE, "col"=TRUE, "both"=TRUE)

  ##select only codes that are present in code_set
  m=x@count_matrixes[[count_matrix]]
  m=m[row.names(m)%in%x@code_sets[[code_set]]$code,]
  m[m<min_reads]=0
  m=m[rowSums(m)>0,]
  m=m[rowSums(m>0)<max_repeats,]
  m=m[rowSums(m>0)>=min_repeats,]
  m=m[,colSums(m)>0]
  #m=sweep(m,2,colSums(m),`/`)


  if(cluster_row==TRUE) {row.order <- hclust(dist(m),method=agglomeration)$order; m <- m[row.order, ] }
  if(cluster_col==TRUE) {col.order <- hclust(dist(t(m)),method=agglomeration)$order;  m <- m[, col.order]}

  if(cluster_row==FALSE &  cluster_col==FALSE & agglomeration =="binary")
  {
    binary=array()
    for(i in 1:nrow(m)) binary[i]=paste0(as.numeric((m>0)[i,]),collapse = "")
    m=m[order(binary,rowSums(m)),]
  }


  m$pos=1:nrow(m)
  m$code=row.names(m)
  mm=reshape::melt(m,id.vars=c("pos","code"))
  m=subset(m, select=-c(pos,code));


    mm[mm$value==0,]$value=NA
    labs <- switch(labels, "sample"=names(m), "alias"=lapply(names(m), get_alias, lox=x, set=count_matrix))
    ggplot(mm)+
      geom_tile(aes(pos,as.numeric(variable),fill=log10(value+1),group=code))+
      scale_fill_gradient2(midpoint = 0,high = "darkred",mid = "lightgray",name="log10(counts+1)")+
      theme_minimal()+
      xlab("")+
      ylab("samples")+xlab("loxcodes")+
      scale_x_continuous(expand = c(0, 0))+
      scale_y_continuous(breaks=1:length(labs),labels=labs,sec.axis = dup_axis(labels=colSums(m>0),name="# codes"))+
      theme(legend.position="top")

})


#' Generate bubble from count_matrix (TW/07/02/2019)
#' @param loxcode_experiment loxcode_experiment object
#' @export
setGeneric("bubble_plot", function(loxcode_experiment,count_matrix="all_samples",code_set="all_codes",style="ggplot", labels="sample", clustering="none",agglomeration="complete",min_reads=0,max_repeats=100,min_repeats=1) {standardGeneric("bubble_plot")})

setMethod("bubble_plot", "loxcode_experiment", function(loxcode_experiment,count_matrix="all_samples",code_set="all_codes",style="ggplot", labels="sample", clustering="none",agglomeration="complete",min_reads=0,max_repeats=100,min_repeats=1){
  x = loxcode_experiment

  #clustering
  cluster_row = switch(clustering, "none"=FALSE, "row"=TRUE, "col"=FALSE, "both"=TRUE)
  cluster_col = switch(clustering, "none"=FALSE, "row"=FALSE, "col"=TRUE, "both"=TRUE)

  ##select only codes that are present in code_set
  m=x@count_matrixes[[count_matrix]]
  m=m[row.names(m)%in%x@code_sets[[code_set]]$code,]
  m[m<min_reads]=0

  m=m[rowSums(m)>0,]
  m=m[rowSums(m>0)<max_repeats,]
  m=m[rowSums(m>0)>=min_repeats,]
  m=m[,colSums(m)>0]
  m=sweep(m,2,colSums(m),`/`)

  if(cluster_row==TRUE) {row.order <- hclust(dist(m),method=agglomeration)$order; m <- m[row.order, ] }
  if(cluster_col==TRUE) {col.order <- hclust(dist(t(m)),method=agglomeration)$order;  m <- m[, col.order]}

  if(cluster_row==FALSE &  cluster_col==FALSE & agglomeration =="binary")
  {
    binary=array()
    for(i in 1:nrow(m)) binary[i]=paste0(as.numeric((m>0)[i,]),collapse = "")
    m=m[order(binary,rowSums(m)),]
    print(table(binary))
  }

  m$index=1:nrow(m)
  m$code=row.names(m)
  mm=reshape::melt(m,id.var=c("index","code"))


  source("~/R/functions/randomcolor.R")

  labs <- switch(labels, "sample"=names(m), "alias"=lapply(names(m), get_alias, lox=x, set=count_matrix))

  ggplot()+
    geom_hline(aes(yintercept = as.vector(1:10 %o% 10^(0:-7))),color="lightgray", size=0.1,alpha=0.3)+
    scale_color_manual(values = unname(distinctColorPalette(nrow(m))),breaks=rownames(m),guide = FALSE)+
    geom_point(aes(factor(variable),index,color=factor(index),size=log(value)),data=mm,alpha=0.8)+
    theme(legend.position="none")+
    scale_radius(range = c(1,10))+
    theme_minimal()+
    theme(plot.title = element_text(size = 7),legend.position="none")+
    xlab("samples")+ylab("")+
    scale_x_discrete(labels=labs)+
    theme(
      panel.grid.major.x = element_blank() ,
      panel.grid.minor.y = element_blank() ,
      axis.text=element_text(size=10)
      #axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)
    )+coord_flip()

})



#' Read stats (TW/01/11/2019)
#' @param x loxcode_experiment object
#' @param count_matrix (not implemented yet)
#' @param code_set (not implemented yet)
#' @param plot specify which plot (size/complexity/both/ratio)
#' @param fill specify whether reads should be normalized in boxplots
#' @export
setGeneric("readstats_plot_old", function(x,count_matrix="all_samples",code_set="all_codes",plot="size",fill=TRUE, labels="sample") {standardGeneric("readstats_plot_old")})

setMethod("readstats_plot_old", "loxcode_experiment", function(x,count_matrix="all_samples",code_set="all_codes",plot="size", fill=TRUE, labels="sample"){

  pos="stack"; if(fill==TRUE) pos="fill"
  P=ggplot()

  if(plot == "size"){
  #iterating over samples and sizes and count # reads per condition
    d1=data.frame();
    for(i in x@samples){
      if (i@name %in% names(x@count_matrixes[[count_matrix]])){
        dd=i@decode@data;
        for(j in c(1:13)){
          ddd=data.frame(sample=name(i), size=j, count=sum(subset(dd,size==j)$count))
          d1=plyr::rbind.fill(d1,ddd);
          print(ddd)
        }
      }
    }
    d1=subset(d1,count>0)
    P=(ggplot(d1)+geom_bar(aes(fill=factor(size), y=count, x=sample),position=pos, stat="identity")+
      scale_fill_discrete(name="size")+
      theme_minimal())
  }

  if(plot == "complexity"){
  #iterating over samples and complexity and count # reads per condition
    d2=data.frame();
    for(i in x@samples){
      if (i@name %in% names(x@count_matrixes[[count_matrix]])) {
        dd=i@decode@data;
        for(j in c(0:15)){
          ddd=data.frame(sample=name(i), dist_orig=j, count=sum(subset(dd,dist_orig==j)$count))
          d2=plyr::rbind.fill(d2,ddd);
        }
      }
    }
    d2=subset(d2,count>0)
    P=(ggplot(d2)+geom_bar(aes(fill=factor(dist_orig), y=count, x=sample),position=pos, stat="identity")+
      scale_fill_discrete(name="complexity")+
      theme_minimal())
  }

  ## plotting reads for both complexity and size
  if(plot == "both"){
    #iterating over sizes and complexities
    d3=data.frame();
    for(i in x@samples){
      #if (i@name %in% names(x@count_matrixes[[count_matrix]])) {
        dd=i@decode@data;
        for(j in c(0:15))for(k in c(1:13)){
          ddd=data.frame(sample=name(i), dist_orig=j, size=k, reads=sum(subset(dd,dist_orig==j & size==k)$count))
          d3=plyr::rbind.fill(d3,ddd);
        }
      #}
    }

    d3=subset(d3,reads>0)
    P=(ggplot(d3)+facet_wrap(~sample)+geom_point(aes(factor(size),factor(dist_orig), size=reads, color=(reads)))+
      scale_fill_discrete(name="complexity")+
      theme_minimal()+xlab("size")+ylab("complexity")+
      scale_size_area()+scale_color_gradient(low = "blue", high = "red"))
  }


  ## plotting ratio barcodes/reads
  if(plot == "ratio"){
    #iterating over sizes and complexities
    d4=data.frame();
    for(i in x@samples){
      if (i@name %in% names(x@count_matrixes[[count_matrix]])) {
        dd=i@decode@data;
        for(j in c(0:15))for(k in c(1:13)){
          ddd=data.frame(sample=name(i), dist_orig=j, size=k, ratio=nrow(subset(dd,dist_orig==j & size==k))/sum(subset(dd,dist_orig==j & size==k)$count))
          d4=plyr::rbind.fill(d4,ddd);
        }
      }
    }

    d4=subset(d4,ratio>0)
    P=(ggplot(d4)+facet_wrap(~sample)+geom_point(aes(factor(size),factor(dist_orig), size=ratio, color=(ratio)))+
      scale_fill_discrete(name="complexity")+
      theme_minimal()+xlab("size")+ylab("complexity")+
      scale_size_area()+scale_color_gradient(low = "blue", high = "red"))
  }

  P
})

# ggplot()+geom_histogram(aes(NN167@samples[["N701-N505"]]@decode@data$size), stat="count")

#' readstats plot
#'
#' @param loxcode_experiment loxcode_experiment object
#' @param count_matrix
#' @param code_set
#' @param plot specify which plot (size/complexity/both/ratio)
#' @param fill specify whether reads should be normalized in boxplots
#' @export
setGeneric("readstats_plot", function(loxcode_experiment,count_matrix="all_samples",code_set="all_codes",plot="size",fill=TRUE, labels="alias") {standardGeneric("readstats_plot")})

setMethod("readstats_plot", "loxcode_experiment", function(loxcode_experiment,count_matrix="all_samples",code_set="all_codes",plot="size", fill=TRUE, labels="sample"){
  x = loxcode_experiment
  # graph config
  pos = "stack"
  if (fill==TRUE) { pos = "fill" }

  # initialize
  p = ggplot()
  counts = x@count_matrixes[[count_matrix]]
  codes = x@code_sets[[code_set]]
  samples = names(counts)

  ### SIZE PLOT
  if (plot == "size"){

    sizeBySample = data.frame()
    # create size count table
    for (i in 1:length(samples)) {
      for (j in c(1:13)){
        row = data.frame(sample=samples[[i]], alias=get_alias(x, count_matrix, samples[[i]]), size=j, count=0)
        sizeBySample = plyr::rbind.fill(sizeBySample, row)
      }
    }

    # fill size count table
    for (i in 1:nrow(counts)) {
      row = counts[i, ]
      code = rownames(row)
      if (code %in% codes$code) {
        size = length(gregexpr(" ", code)[[1]]) + 1 # one more than number of spaces
        for (j in names(row)) {
          curr = row[[j]]
          index = which(sizeBySample$sample==j & sizeBySample$size==size)
          prev = sizeBySample$count[[index]]
          sizeBySample$count[[index]] = curr + prev
        }
      }
    }

    sizeBySample = subset(sizeBySample, count>0)
    p = ggplot(sizeBySample) +
      geom_bar(aes(fill=factor(size), y=count, x=switch(labels, "alias"=alias, "sample"=sample)), position=pos, stat="identity") +
      scale_fill_discrete(name="size") +
      labs(x="sample")
  }

  ### COMPLEXITY PLOT
  if (plot=="complexity") {

    # create complexity count table
    complexityBySample = data.frame()
    for (i in 1:length(samples)) {
      for (j in c(0:15)){
        row = data.frame(sample=samples[[i]], alias=get_alias(x, count_matrix, samples[[i]]), dist_orig=j, count=0)
        complexityBySample = plyr::rbind.fill(complexityBySample, row)
      }
    }

    # fill complexity count table
    for (i in 1:nrow(counts)) {
      row = counts[i, ]
      code = rownames(row)
      if (code %in% codes$code) {
        dist = codes$dist_orig[[match(code, codes$code)]]
        if (is.na(dist)) { dist = 0 }
        for (j in names(row)) {
          curr = row[[j]]
          index = which(complexityBySample$sample==j & complexityBySample$dist_orig==dist)
          prev = complexityBySample$count[[index]]
          complexityBySample$count[[index]] = curr + prev
        }
      }
    }

    complexityBySample = subset(complexityBySample, count>0)
    p = ggplot(complexityBySample) +
      geom_bar(aes(fill=factor(dist_orig), y=count, x=switch(labels, "alias"=alias, "sample"=sample)), position=pos, stat="identity") +
      scale_fill_discrete(name="complexity") +
      labs(x="sample")
  }

  ### SIZE COMPLEXITY PLOT
  if (plot=="both") {

    # create counts table
    bothBySample = data.frame()
    for (i in 1:length(samples)) {
      for (j in 0:15) for (k in 1:13) {
        row = data.frame(sample=samples[[i]], alias=get_alias(x, count_matrix, samples[[i]]), dist_orig=j, size=k, count=0)
        bothBySample = plyr::rbind.fill(bothBySample, row)
      }
    }

    # fill counts table
    for (i in 1:nrow(counts)) {
      row = counts[i, ]
      code = rownames(row)
      if (code %in% codes$code) {
        size = length(gregexpr(" ", code)[[1]]) + 1 # one more than number of spaces
        dist = codes$dist_orig[[match(code, codes$code)]]
        if (is.na(dist)) { dist = 0 }
        for (j in names(row)) {
          curr = row[[j]]
          index = which(bothBySample$sample==j & bothBySample$dist_orig==dist & bothBySample$size==size)
          prev = bothBySample$count[[index]]
          bothBySample$count[[index]] = curr + prev
        }
      }
    }

    bothBySample = subset(bothBySample, count>0)
    p = (ggplot(bothBySample) + facet_wrap(~switch(labels, "alias"=alias, "sample"=sample)) + geom_point(aes(factor(size), factor(dist_orig), size=count, color=count)) +
         scale_fill_discrete(name="complexity") +
         theme_minimal() + xlab("size") + ylab("complexity") +
         scale_size_area() + scale_color_gradient(low="blue", high="red"))
  }

  ### RATIO PLOT
  if (plot=="ratio") {

    # create ratio table
    ratioBySample = data.frame()
    for (i in 1:length(samples)) {
      for (j in 0:15) for (k in 1:13) {
        row = data.frame(sample=samples[[i]], alias=get_alias(x, count_matrix, samples[[i]]), dist_orig=j, size=k, ratio=0, ncodes=0, count=0)
        ratioBySample = plyr::rbind.fill(ratioBySample, row)
      }
    }

    # fill ratio table
    for (i in 1:nrow(counts)) {
      row = counts[i, ]
      code = rownames(row)
      if (code %in% codes$code) {
        size = length(gregexpr(" ", code)[[1]]) + 1 # one more than number of spaces
        dist = codes$dist_orig[[match(code, codes$code)]]
        if (is.na(dist)) { dist = 0 }
        for (j in names(row)) {
          curr = row[[j]]
          index = which(ratioBySample$sample==j & ratioBySample$dist_orig==dist & ratioBySample$size==size)
          prev = ratioBySample$count[[index]]
          ratioBySample$count[[index]] = curr + prev
          ratioBySample$ncodes[[index]] = ratioBySample$ncodes[[index]] + 1
          ratioBySample$ratio[[index]] = ratioBySample$ncodes[[index]] / ratioBySample$count[[index]]
        }
      }
    }

    ratioBySample$count <- NULL
    ratioBySample$ncodes <- NULL
    ratioBySample = subset(ratioBySample, ratio>0)
    ratioBySample = subset(ratioBySample, ratio!=Inf)

    p = (ggplot(ratioBySample) + facet_wrap(~switch(labels, "alias"=alias, "sample"=sample)) + geom_point(aes(factor(size),factor(dist_orig), size=ratio, color=(ratio)))+
         scale_fill_discrete(name="complexity")+
         theme_minimal()+xlab("size")+ylab("complexity")+
         scale_size_area()+scale_color_gradient(low = "blue", high = "red"))
  }

  return (p)
})


#' sample comparison pie
#' @param x loxcode_experiment object
#' @param count_matrix
#' @param code_set
#' @param scale scale the size of pies
#' @param labels by sample or alias
#' @export
setGeneric("sample_comparison_pie", function(x,count_matrix="all_samples",code_set="all_codes", scale=1, labels="sample") {standardGeneric("sample_comparison_pie")})

setMethod("sample_comparison_pie", "loxcode_experiment", function(x,count_matrix="all_samples",code_set="all_codes", scale=1, labels="sample"){

  M=x@count_matrixes[[count_matrix]]
  M=M[row.names(M)%in%x@code_sets[[code_set]]$code,]

  m=data.frame();

  for(i in 1:ncol(M))for(j in 1:ncol(M)){
    both=sum(M[,i]>0 & M[,j]>0,na.rm=TRUE)
    in.1=sum(M[,i]>0 & M[,j]==0,na.rm=TRUE)
    in.2=sum(M[,j]>0 & M[,i]==0,na.rm=TRUE)
    d=data.frame(sample1=i, sample2=j,both=both,in.1=in.1,in.2=in.2,stringsAsFactors = FALSE)
    m=plyr::rbind.fill(m,d)
  }
  library('scatterpie')

  sample_names = names(x@count_matrixes[[count_matrix]])
  labels = switch(labels, "sample"=sample_names, "alias"=lapply(sample_names, get_alias, lox=x, set=count_matrix))

  ggplot(m)+
    scatterpie::geom_scatterpie(aes(x=sample1,y=sample2,r=(both+in.1+in.2)/(1200/scale)),data=m, cols=c(3:5),size=0.1)+
    scale_x_continuous(breaks=unique(m$sample1),labels=labels)+
    scale_y_continuous(breaks=unique(m$sample2),labels=labels)+
    theme_bw()+theme(axis.text.x=element_text(angle=45, hjust=1))+
    scale_fill_discrete(name="")

})

#' saturation plot (TW/05/11/2019)
#' @param loxcode_experiment loxcode_experiment object
#' @param loxcode_sample which sample to use
#' @param code_set which code set to use
#' @export
setGeneric("saturation_plot", function(loxcode_experiment, loxcode_sample, code_set="all_codes") {standardGeneric("saturation_plot")})

setMethod("saturation_plot", "loxcode_experiment", function(loxcode_experiment, loxcode_sample, code_set="all_codes"){
  x = loxcode_experiment
  i = x@samples[[loxcode_sample]]
  cs=i@decode@data;
  cs=subset(cs,code%in%x@code_sets[[code_set]]$code)
  cs=cs[order(cs$firstread),]
  cs$index=c(1:nrow(cs))
  new = data.frame(firstread = cs$firstread, index = cs$index, dist_orig=cs$dist_orig)

  g <- ggplot(new)+geom_step(aes(firstread,index,color=factor(dist_orig)))+geom_point(aes(firstread,index,color=factor(dist_orig)))
  # g <- ggplot(new) + geom_line(aes(firstread1,index1,color=dist_orig1)) + geom_line(aes(firstread2,index2,color=dist_orig2))+scale_colour_manual()
  return (g)
})

#' Multiple lined saturation plot
#'
#' @param loxcode_experiment loxcode_experiment object
#' @param loxcode_samples list of loxcode samples
#' @param code_sets list of code_sets to use
#' @param colorby colour by dist_orig or sample
#' @export

setGeneric("saturation_multi", function(loxcode_experiment, loxcode_samples, codesets, colorby="dist_orig") {standardGeneric("saturation_multi")})

setMethod("saturation_multi", "loxcode_experiment", function(loxcode_experiment, loxcode_samples, codesets, colorby="dist_orig") {
  x = loxcode_experiment
  data = data.frame()

  # create the data.frame
  for (i in 1:length(loxcode_samples)) {
    s = loxcode_samples[[i]]
    c = codesets[[i]]
    sample = x@samples[[s]]
    counts = sample@decode@data
    counts$dist_orig[is.na(counts$dist_orig)] = 0
    counts = subset(counts, counts$code %in% x@code_sets[[codesets[[i]]]]$code)
    counts = counts[order(counts$firstread), ]
    counts$index = c(1:nrow(counts))
    one_sample_data = data.frame(firstread = counts$firstread,
                                 index = counts$index,
                                 dist_orig = counts$dist_orig,
                                 sample = s)
    colour = switch(colorby, "dist_orig" = one_sample_data$dist_orig, "sample" = one_sample_data$sample)

    if (i==1) {
      g <- ggplot(one_sample_data, aes(firstread, index)) +
        geom_line(data=one_sample_data) +
        geom_point(data=one_sample_data, aes(firstread, index, colour = switch(colorby, "dist_orig" = factor(dist_orig), "sample" = sample)))+
        scale_color_discrete(name="")

    }
    else {
      g <- g +
        geom_line(data=one_sample_data) +
        geom_point(data=one_sample_data, aes(firstread, index, colour = switch(colorby, "dist_orig" = factor(dist_orig), "sample" = sample)))
    }
  }

  return(g)
})

#' 2-way Venn diagram
#'
#' Plot 2-way Venn diagram showing common and distinct barcodes between two given samples in an experiment using VennDiagram package
#' @param x loxcode_experiment object
#' @param a sample A name
#' @param b sample B name
#' @param size_range range of loxcode sizes to show in Venn diagram. Both lower and upper bounds are inclusive
#' @param dist_range range of dist_orig values to show in Venn diagram. Both lower and upper bounds are inclusive
#' @param labels optional list of custom labels for samples A, B, otherwise sample names for A, B are used.
#' @return grobTree containing the 2-way Venn diagram graphic
#' @export
setGeneric("venn_2way", function(x, a, b, size_range, dist_range, ...){standardGeneric("venn_2way")})

setMethod("venn_2way", "loxcode_experiment", function(x, a, b, size_range, dist_range, labels = NA){
  samp_names <- c(a, b)
  samp <- as.list(samp_names)

  if(!is.na(labels)){
    samp_names <- labels
  }

  samp <- lapply(samp, function(s){
    return(loxcoder::get_valid(x, s) %>%
             filter(size >= size_range[1] &
                      size <= size_range[2] &
                      dist_orig >= dist_range[1] &
                      dist_orig <= dist_range[2]))
  })

  v <- draw.pairwise.venn(area1 = nrow(samp[[1]]),
                          area2 = nrow(samp[[2]]),
                          cross.area = length(intersect(samp[[1]]$code, samp[[2]]$code)),
                          category = samp_names,
                          fontfamily = rep('sans', 3),
                          cat.fontfamily = rep('sans', 2),
                          fill = c('red', 'blue'))
  return(grobTree(children = v))
})

#' 3-way Venn diagram
#'
#' Plot 3-way Venn diagram showing common and distinct barcodes between two given samples in an experiment using VennDiagram package
#' @param x loxcode_experiment object
#' @param a sample A name
#' @param b sample B name
#' @param c sample C name
#' @param size_range range of loxcode sizes to show in Venn diagram. Both lower and upper bounds are inclusive
#' @param dist_range range of dist_orig values to show in Venn diagram. Both lower and upper bounds are inclusive
#' @param labels optional list of custom labels for samples A, B, C, otherwise sample names for A, B, C are used.
#' @return grobTree containing the 3-way Venn diagram graphic
#' @export
#' @export
setGeneric("venn_3way", function(x, a, b, c, size_range, dist_range, ...){standardGeneric("venn_3way")})

setMethod("venn_3way", "loxcode_experiment", function(x, a, b, c, size_range, dist_range, labels = NA){
  samp_names <- c(a, b, c)
  samp <- as.list(samp_names)
  names(samp) <- samp_names

  if(!is.na(labels)){
    samp_names <- labels
  }

  samp <- lapply(samp, function(s){ return(loxcoder::get_valid(x, s) %>%
                                             filter(size >= size_range[1] &
                                                      size <= size_range[2] &
                                                      dist_orig >= dist_range[1] &
                                                      dist_orig <= dist_range[2]))} )

  pairs <- list("n12" = c(1, 2), "n23" = c(2, 3), "n13" = c(1, 3));

  n_pairs <- lapply(pairs, function(x){
    length(intersect(samp[[x[1]]]$code, samp[[x[2]]]$code))
  })

  n_all <- length(intersect(samp[[1]]$code, intersect(samp[[2]]$code, samp[[3]]$code)))

  v <- draw.triple.venn(area1 = nrow(samp[[1]]),
                        area2 = nrow(samp[[2]]),
                        area3 = nrow(samp[[3]]),
                        n12 = n_pairs$n12,
                        n23 = n_pairs$n23,
                        n13 = n_pairs$n13,
                        n123 = n_all,
                        category = samp_names,
                        fontfamily = rep('sans', 7),
                        cat.fontfamily = rep('sans', 3),
                        fill = c('red', 'blue', 'green'))
  return(grobTree(children = v))
})

#' sample table
#'
#' Tabulates the barcode counts, total number of reads, maximum complexity and consensus filtered for each sample in an experiment
#' @param x loxcode_experiment object
#' @param c count_matrix of samples to display
#' @export
setGeneric("sample_table", function(x, c) {standardGeneric("sample_table")})

setMethod("sample_table", "loxcode_experiment", function(x, c){
  d=data.frame()
  for (i in 1:length(x@samples)) {
    curr_sample = x@samples[[i]]
    if (name(curr_sample) %in% names(x@count_matrixes[[c]])){
      temp=data.frame(
        "Sample_Name"= name(curr_sample),
        curr_sample@meta,
        stringsAsFactors = FALSE)
      d=plyr::rbind.fill(d,temp)
    }
  }
  return(d)
})

strCap <- function(y) {
  c <- strsplit(y, " ")[[1]]
  paste(toupper(substring(c, 1,1)), substring(c, 2),
        sep="", collapse=" ")
}

capitalize <- function(w) {
  cap = c()
  for (word in w) {
    cap = append(cap, strCap(word))
  }
  return (cap)
}

#' summary table
#'
#' Tabulates the barcode counts, total number of reads, maximum complexity and consensus filtered for each sample in an experiment
#' @param x loxcode_experiment object
#' @param c count_matrix of samples to display
#' @export
setGeneric("summary_table2", function(x, c="all_samples") {standardGeneric("summary_table2")})

setMethod("summary_table2", "loxcode_experiment", function(x, c="all_samples"){

  d=data.frame()

  if (!length(x@count_matrixes[[c]])) { return() }

  # check if samples have been collapsed
  collapsed = TRUE
  if (length(intersect(names(x@samples), names(x@count_matrixes[[c]]))) != 0){
    collapsed = FALSE
  }

  # extract data from loxcode experiment
  if (collapsed == FALSE) {
    for (i in 1:length(x@samples)) {
      curr_sample = x@samples[[i]]
      metadata = curr_sample@meta
      names(metadata) <- capitalize(names(metadata))
      if (name(curr_sample) %in% names(x@count_matrixes[[c]])){
        temp=data.frame("Sample_Name" = name(curr_sample),
                        "Alias" = x@alias[[c]]$alias[[match(name(curr_sample), x@alias[[c]]$sample_name)]],
                        metadata,
                        "Barcode_Count" = length(curr_sample@decode@data$count),
                        "Invalid_Count" = length(curr_sample@decode@data$count) - sum(curr_sample@decode@data$is_valid, na.rm=FALSE),
                        "Number_of_Reads" = curr_sample@decode_stats$tot_reads,
                        "Max_Complexity" = max(na.omit(curr_sample@decode@data$dist_orig)),
                        "Consensus_Filtered" = curr_sample@decode_stats$consensus_filtered,
                        "Percent_Filtered" = round(100*curr_sample@decode_stats$consensus_filtered/curr_sample@decode_stats$tot_reads,2),
                        stringsAsFactors = FALSE)
        d = plyr::rbind.fill(d,temp)
      }
    }
  }
  else {
    counts = x@count_matrixes[[c]]
    all_codes = x@code_sets[["all_codes"]]
    invalid_codes = x@code_sets[["invalid_codes"]]
    meta = get_collapsed_meta(x, c)
    for (i in 1:ncol(counts)) {
      curr = names(counts)[i]
      barcodes = row.names(counts[which (counts[[curr]]>0), ])
      metadata = meta[curr, ]
      names(metadata) <- capitalize(names(meta))
      temp = data.frame("Sample_Name" = curr,
                        "Alias" = x@alias[[c]]$alias[[match(curr, x@alias[[c]]$sample_name)]],
                        metadata,
                        "Barcode_Count" = sum(counts[,i]>0),
                        "Invalid_Count" = sum(barcodes %in% row.names(invalid_codes)),
                        "Number_of_Reads" = round(sum(counts[,i]), 0),
                        "Max_Complexity" = max(all_codes[which (all_codes$code %in% barcodes), ]$dist_orig, na.rm=TRUE),
                        stringsAsFactors = FALSE)
      d = plyr::rbind.fill(d, temp)
    }
    names(d)[names(d) == "metadata"] = names(meta)
  }


  return(d)
})

#' Summary table v2
#'
#' @param lox loxcode_experiment object
#' @param s sample set
#' @return table of sample information
#' @export
setGeneric("summary_table", function(lox, s="all_samples") {standardGeneric("summary_table")})

setMethod("summary_table", "loxcode_experiment", function(lox, s="all_samples") {
  counts = lox@count_matrixes[[s]]
  aliases = lox@alias[[s]]
  all_codes = lox@code_sets[["all_codes"]]
  invalid_codes = lox@code_sets[["invalid_codes"]]
  meta = get_collapsed_meta(lox, s)
  d = data.frame()
  for (i in 1:ncol(counts)) {
    curr = names(counts)[i]
    barcodes = row.names(counts[which (counts[[curr]]>0), ])
    metadata = meta[curr, ]

    if (curr %in% names(lox@samples)) {
      stats = lox@samples[[curr]]@decode_stats
      nreads = stats$tot_reads
      cfiltered = stats$consensus_filtered
      pfiltered = round(100 * cfiltered / nreads, 2)
    } else {
      nreads = NA
      cfiltered = round(sum(counts[,i]), 0)
      pfiltered = NA
    }

    temp = data.frame("Sample_Name" = curr,
                      "Alias" = aliases$alias[match(curr, aliases$sample_name)],
                      metadata,
                      "Barcode_Count" = sum(counts[,i]>0),
                      "Invalid_Count" = sum(barcodes %in% row.names(invalid_codes)),
                      "Number_of_Reads" = nreads,
                      "Max_Complexity" = max(all_codes[which (all_codes$code %in% barcodes), ]$dist_orig, na.rm=TRUE),
                      "Consensus_Filtered" = cfiltered,
                      "Percent_Filtered" = pfiltered,
                      stringsAsFactors = FALSE)
    d = plyr::rbind.fill(d, temp)
  }
  d$Max_Complexity = replace(d$Max_Complexity, is.infinite(d$Max_Complexity), 0)
  return(d)
})

#' codeset table
#'
#' Tabulates the codes in a codeset
#' @param x loxcode_experiment object
#' @param n name of codeset to display
#' @return dataframe of codeset information
#' @export
setGeneric("codeset_table", function(x, n) {standardGeneric("codeset_table")})

setMethod("codeset_table", "loxcode_experiment", function(x, n){
  table = x@code_sets[[n]]
  names(table) <- capitalize(names(table))
  return(table)
})

#' experiments table
#'
#' Presents a list of loxcode_experiments in table form
#' @param exp list of loxcode_experiment objects
#' @return a data.frame containing the information
#' @export
setGeneric("exp_table", function(exp) {standardGeneric("exp_table")})

setMethod("exp_table", "list", function(exp) {
  d = data.frame()

  if (length(exp)>0) {
    for (i in 1:length(exp)){
      directories = ""
      curr = exp[[i]]
      for (dir in curr@dir) {
        directories = paste(directories, dir)
      }
      temp = data.frame("Experiment_Name"=curr@name,
                        "Directory"=directories,
                        "Samples"=length(curr@samples),
                        "Code_Sets"=length(curr@code_sets),
                        "Sample_Sets"=length(curr@count_matrixes),
                        stringsAsFactors=FALSE)
      d = plyr::rbind.fill(d, temp)
    }
  }
  return(d)
})

#' Code frequency table
#'
#' Creates a data.frame of the frequency for each size and complexity combination
#' @param x loxcode_experiment object
#' @param s sample set of independent samples
#' @param c code_set
#' @return data.frame of code frequencies
#' @export
setGeneric("code_freq_table", function(x, s="all_samples", c="all_codes") {standardGeneric("code_freq_table")})

setMethod("code_freq_table", "loxcode_experiment", function(x, s="all_samples", c="all_codes") {

  codeset = x@code_sets[[c]]
  counts = x@count_matrixes[[s]]
  Y = data.frame()

  if (!length(counts)) { return() } # prevents LoxCodeR from crashing
  #if (has_warning(max(codeset$dist_orig, na.rm=TRUE))) {print("crash prevented"); return() }

  # create a column in code sets recording frequency of barcodes
  index = match(row.names(counts),codeset$code)
  index = subset(index, !is.na(index))
  codeset$rep[index] = rowSums(counts>0)

  # create a data.frame with repetition frequencies for each size and complexity
  for(i in c(1:max(codeset$size, na.rm=TRUE))) for(j in c(1:max(codeset$dist_orig, na.rm=TRUE))){
    y=data.frame(size=i,dist_orig=j)
    z=as.data.frame(table(subset(codeset, size==i & dist_orig==j)$rep),stringsAsFactors = FALSE)
    if(nrow(z)==0) next;
    n=z$Var1;
    z=as.data.frame(z[,-1]) ##to-do: find a way to order count entries
    y=cbind(y,sum(z),t(z))
    names(y)=c("size","dist_orig","radius",n)
    Y=plyr::rbind.fill(Y,y);
  }
  Y[is.na(Y)]=0

  return(Y)
})

#' Filtered Code freq table
#'
#' Data.frame of filtered codes specified by user parameters
#' @param x loxcode_experiment object
#' @param s independent sample set
#' @param c code set
#' @param t tolerance threshold for repeated proportions
#' @param m maximum repeats tolerated
#' @return scatterpie of filtered codes
#' @export
setGeneric("filtered_codes_table", function(x, s="all_samples", c="all_codes", t=0.05, m=2) {standardGeneric("filtered_codes_table")})

setMethod("filtered_codes_table", "loxcode_experiment", function(x, s="all_samples", c="all_codes", t=0.05, m=2) {
  # data-frame of repetition frequencies
  Y = code_freq_table(x, s, c)

  if (!length(x@code_sets[[c]]) | !length(x@count_matrixes[[s]])) { print("errorrrr"); return() }

  YY = data.frame()
  total = max(as.numeric(names(Y[,!names(Y)%in%c("size", "dist_orig", "radius")])))
  for (i in 1:nrow(Y)) {
    row = Y[i,]
    proportion = sum(row[as.character(seq(m, total))] / sum(row[as.character(seq(1, total))]))
    if (proportion*100<=t){
      YY = plyr::rbind.fill(YY, row)
    }
  }
  return(YY)
})

#' Code frequency pie
#'
#' Display the frequency proportions of codes of every permutation of size and complexity
#'
#' @param x loxcode_experiment object
#' @param s sample set of independent samples
#' @param c code_set
#' @return scatterpie plot
#' @export
setGeneric("code_frequency_pie", function(x, s="all_samples", c="all_codes") {standardGeneric("code_frequency_pie")})

setMethod("code_frequency_pie", "loxcode_experiment", function(x, s="all_samples", c="all_codes") {

  Y = code_freq_table(x, s, c)

  # scatterpie plot
  g <- ggplot(Y)+
    geom_scatterpie(aes(x=(size), y=(dist_orig), r=log10(radius)/10),data=Y,cols=c(4:ncol(Y)),size=0.1)+
    scale_x_continuous(breaks=c(1,3,5,7,9,11,13))+scale_y_continuous(breaks=c(1:10))+
    theme_bw()+scale_fill_discrete(name="Repeats")

  return(g)
})

#' Filtered code frequency pie
#'
#' Filters out codes that have an overlap between independent samples
#' @param x loxcode_experiment object
#' @param s independent sample set
#' @param c code set
#' @param t tolerance threshold for repeated proportions
#' @param m maximum repeats tolerated
#' @return scatterpie of filtered codes
#' @export
setGeneric("filtered_codes_pie", function(x, s="all_samples", c="all_codes", t=0.05, m=2) {standardGeneric("filtered_codes_pie")})

setMethod("filtered_codes_pie", "loxcode_experiment", function(x, s="all_samples", c="all_codes", t=0.05, m=2) {
  Y = code_freq_table(x, s, c)
  YY = filtered_codes_table(x, s, c, t, m)

  g <- ggplot(YY)+
    geom_scatterpie(aes(x=(size), y=(dist_orig), r=log10(radius)/5),data=YY,cols=c(4:ncol(YY)),size=0.1)+
    scale_x_continuous(breaks=c(1,3,5,7,9,11,13))+scale_y_continuous(breaks=c(1:max(YY$dist_orig)+1))+
    theme_bw()+scale_fill_discrete(name="Repeats")

  return(g)
})

#' Get sample alias
#'
#' @param lox loxcode_experiment object
#' @param set sample set name
#' @param name sample_name
#' @return sample alias
#' @export
setGeneric("get_alias", function(lox, set, name) {standardGeneric("get_alias")})

setMethod("get_alias", "loxcode_experiment", function(lox, set, name) {
  aliases = lox@alias[[set]]
  index = match(name, aliases$sample_name)
  alias = NA
  if (!is.na(index)) {
    alias = aliases$alias[[index]]
  }
  return (alias)
})

#' Get sample name from alias
#'
#' @param lox loxcode_experiment object
#' @param set sample set name
#' @param name alias
#' @return sample alias
#' @export
setGeneric("get_samplename", function(lox, set, name) {standardGeneric("get_samplename")})

setMethod("get_samplename", "loxcode_experiment", function(lox, set, name) {
  aliases = lox@alias[[set]]
  index = match(name, aliases$alias)
  samplename = NA
  if (!is.na(index)) {
    samplename = aliases$sample_name[[index]]
  }
  return (samplename)
})
jngwehi/loxcodeR documentation built on March 17, 2020, 5:32 p.m.