#' Plots distribution of loxcode sizes found in sample
#'
#' @param lox loxcode_experiment object
#' @param sample loxcode_sample object
#' @param count_matrix count_matrix
#' @param code_set code_set
#' @param labels sample name or alias
#' @export
setGeneric("size_plot", function(lox, sample, count_matrix="all_samples", code_set="all_codes", labels="alias") {standardGeneric("size_plot")})
setMethod("size_plot", "loxcode_experiment", function(lox, sample, count_matrix="all_samples", code_set="all_codes", labels="alias"){
counts = lox@count_matrixes[[count_matrix]]
counts$codes = row.names(counts)
counts = subset(counts, counts[, sample] > 0)
codes = lox@code_sets[[code_set]]
data = subset(codes, code %in% counts$codes)
title = switch(labels, "sample" = sample, "alias" = get_alias(lox, count_matrix, sample))
g = ggplot(data) + geom_bar(aes(as.factor(size), fill = as.factor(is_valid)), position = "stack", stat = "count") +
xlab("size") + ylab("diversity") +
ggtitle(title)
return(g)
})
#' Plots distribution of dist_orig found in sample
#'
#' @param lox loxcode_experiment object
#' @param sample loxcode_sample object
#' @param count_matrix count_matrix
#' @param code_set code_set
#' @param labels sample name or alias
#' @export
setGeneric("dist_orig_plot", function(lox, sample, count_matrix="all_samples", code_set="all_codes", labels="alias") {standardGeneric("dist_orig_plot")})
setMethod("dist_orig_plot", "loxcode_experiment", function(lox, sample, count_matrix="all_samples", code_set="all_codes", labels="alias"){
# x = loxcode_sample
# u <- valid(x)
# #u <- u[u$size == size, ]
# fill_scale <- scale_fill_manual(breaks = 0:15, values = rep('blue', 16)) # use twice since gradient is hard to see otherwise
# g <- ggplot(data = u) + facet_wrap(~size) + geom_bar(aes(x = dist_orig, fill = factor(dist_orig)), show.legend = F) + scale_x_continuous(breaks = 0:10, limits = c(0, 10)) +
# xlab("Distance from origin") + ylab("Diversity") + ggtitle(loxcoder::name(x))
counts = lox@count_matrixes[[count_matrix]]
counts$codes = row.names(counts)
counts = subset(counts, counts[, sample] > 0)
codes = lox@code_sets[[code_set]]
codes[is.na(codes)] = 0
data = subset(codes, code %in% counts$codes)
title = switch(labels, "sample" = sample, "alias" = get_alias(lox, count_matrix, sample))
g = ggplot(data) + facet_wrap(~size) + geom_bar(aes(dist_orig, fill = factor(dist_orig)), show.legend = FALSE) +
scale_x_continuous(breaks = 0:10, limits = c(0, 10)) +
xlab("Distance from origin") + ylab("diversity") +
ggtitle(title)
return(g)
})
#' Produce rank-count plot
#'
#' In the resulting plot, barcodes of specified size are ranked by read count. log10(count) is then plotted
#' against barcode rank in black. In addition, dist_orig for each barcode is shown in red with the same horizontal axis.
#'
#' @param x loxcode_sample object
#' @param size loxcode size to consider
#' @param ymax max y-value for counts
#' @param dmax max d-value for dist_orig
#' @export
setGeneric("rank_count_plot", function(x, size, ymax, dmax) {standardGeneric("rank_count_plot")})
setMethod("rank_count_plot", "loxcode_sample", function(x, size, ymax, dmax){
u <- valid(x)
u <- u[u$size == size, ]
u <- u[order(u$count, decreasing = T), ]
# u$count <- u$count/sum(u$count) # use frequency instead of raw count
ggplot(data = u) + geom_point(aes(x = 1:nrow(u), y = log10(count))) +
geom_point(aes(x = 1:nrow(u), y = dist_orig*ymax/dmax), color = 'red') +
scale_x_log10(breaks = 1:10, labels = u$code[1:10]) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
xlab('code') + ylab('log10(count)') +
ggtitle(sprintf('%s; size = %d', loxcoder::name(x), size)) +
scale_y_continuous(limits = c(0, ymax), sec.axis = sec_axis(~.*(dmax/ymax), name = 'distance'))
})
#' Produce pair comparison scatterplot
#'
#' For each barcode in x1, x2, read counts in each sample are plotted on a log-scale, after adjusting sample x2 to match the
#' total read count (for valid barcodes) in sample x1. Barcodes present in one sample and not the other are shown
#' horizontally and vertically (below 0) using beeswarm plots
#'
#' @param lox loxcode_experiment object
#' @param set sample set
#' @param codeset code set
#' @param x1 loxcode_sample object for sample 1
#' @param x2 loxcode_sample object for sample 2
#' @param range c(lower, upper), specifying the range of dist_orig values to consider. Both bounds are inclusive.
#'
#' @export
setGeneric("pair_comparison_plot", function(lox, set, codeset="all_codes", x1, x2, range, ...) {standardGeneric("pair_comparison_plot")})
setMethod("pair_comparison_plot", "loxcode_experiment", function(lox, set, codeset="all_codes", x1, x2, range, plot="size", labels = "alias"){
u <- get_comparison_table(x1, x2, plot)
u <- u[u$code %in% lox@code_sets[[codeset]]$code,]
name1 = switch(labels, "alias" = get_alias(lox, set, x1@name), "sample" = x1@name)
name2 = switch(labels, "alias" = get_alias(lox, set, x2@name), "sample" = x2@name)
nonzero_mask <- (u$rep1_count > 0 & u$rep2_count > 0)
rep1_zero_mask <- (u$rep1_count == 0)
rep2_zero_mask <- (u$rep2_count == 0)
theme_update(plot.title = element_text(hjust = 0.5))
if (plot=="size") {
g <- ggplot() +
geom_point(aes(y = log10(1 + u$rep1_count[nonzero_mask]) , x = log10(1 + u$rep2_count[nonzero_mask]),
color = as.factor(u$size[nonzero_mask])), alpha = 1) +
geom_abline(alpha=0.75) + ylab(paste('log10(reads in ', name1, ')')) + xlab(paste('log10(reads in ', name2, ')')) +
ggtitle(paste(name1, 'vs', name2)) + scale_color_discrete(name="Size")
if (name1!=name2) {
g <- g +
geom_quasirandom(aes(y = rep(-0.2, sum(rep1_zero_mask)), x = log10(1 + u$rep2_count[rep1_zero_mask]), color = as.factor(u$size[rep1_zero_mask])), groupOnX = F, width = 0.2, alpha = 1) +
geom_quasirandom(aes(y = log10(1 + u$rep1_count[rep2_zero_mask]), x = rep(-0.2, sum(rep2_zero_mask)), color = as.factor(u$size[rep2_zero_mask])), groupOnX = T, width = 0.2, alpha = 1)
}
}
if (plot=="complexity") {
g <- ggplot() +
geom_point(aes(y = log10(1 + u$rep1_count[nonzero_mask]) , x = log10(1 + u$rep2_count[nonzero_mask]),
color = as.factor(u$dist_orig[nonzero_mask])), alpha = 1) +
geom_abline(alpha=0.75) + ylab(paste('log10(reads in ', name1, ')')) + xlab(paste('log10(reads in ', name2, ')')) +
ggtitle(paste(name1, 'vs', name2)) + scale_color_discrete(name="Complexity")
if (name1!=name2){
g <- g +
geom_quasirandom(aes(y = rep(-0.2, sum(rep1_zero_mask)), x = log10(1 + u$rep2_count[rep1_zero_mask]), color = as.factor(u$size[rep1_zero_mask])), groupOnX = F, width = 0.2, alpha = 1) +
geom_quasirandom(aes(y = log10(1 + u$rep1_count[rep2_zero_mask]), x = rep(-0.2, sum(rep2_zero_mask)), color = as.factor(u$size[rep2_zero_mask])), groupOnX = T, width = 0.2, alpha = 1)
}
}
return(g)
})
#' @export
barcode_union <- function(rep1, rep2, type="size", range){
if (type=="size") {
x = unique(c(filter(rep1@decode@data, size >= range[1], size <= range[2])$code,
filter(rep2@decode@data, size >= range[1], size <= range[2])$code))
} else if (type=="complexity") {
x = unique(c(filter(rep1@decode@data, dist_orig >= range[1], dist_orig <= range[2])$code,
filter(rep2@decode@data, dist_orig >= range[1], dist_orig <= range[2])$code))
}
return (x)
}
#' @export
get_barcode_stats_rep <- function(union_bc, rep){
index <- match(union_bc, rep@decode@data$code)
# u <- loxcoder::valid(rep)[index, ]
u <- rep@decode@data[index, ]
u$count[is.na(u$count)] <- 0
u$code <- union_bc
return(u)
}
#' @export
get_comparison_table <- function(rep1, rep2, type="size", range=c(1,13)){
bc_union <- barcode_union(rep1, rep2, type, range)
u1 <- get_barcode_stats_rep(bc_union, rep1)
u2 <- get_barcode_stats_rep(bc_union, rep2)
u1$size <- ifelse(is.na(u1$size), u2$size, u1$size)
u2$size <- u1$size
u1$dist_orig <- ifelse(is.na(u1$dist_orig), u2$dist_orig, u1$dist_orig)
u2$dist_orig <- u1$dist_orig
# scale by total number of reads
u1$count <- u1$count
u2$count <- u2$count*(sum(loxcoder::data(rep1)$count)/sum(loxcoder::data(rep2)$count))
u <- data.frame(code = bc_union,
size = u1$size,
dist_orig = u1$dist_orig,
rep1_count = u1$count,
rep2_count = u2$count)
nonzero_mask <- (u$rep1_count > 0 & u$rep2_count > 0)
rep1_zero_mask <- (u$rep1_count == 0)
rep2_zero_mask <- (u$rep2_count == 0)
u$log_rep1[nonzero_mask] = u$rep1_count[nonzero_mask]
u$log_rep2[nonzero_mask] = u$rep2_count[nonzero_mask]
return(u)
}
#' Pair Comparison Plot 2
#'
#' @param lox Loxcode_experiment object
#' @param sampleset Sample set
#' @param codeset Code set
#' @param s1 First Loxcode_sample
#' @param s2 Second Loxcode_sample
#' @param colorBy Color points by size/dist_orig/firstread
#' @param labels Sample name or alias as labels
#'
#' @export
setGeneric("pair_comparison_plot2", function(lox, sampleset="all_samples", codeset="all_codes", s1, s2, colorBy="size", labels="alias", sizeRange=NULL, dist_origRange=NULL, firstreadRange=NULL) {standardGeneric("pair_comparison_plot2")})
setMethod("pair_comparison_plot2", "loxcode_experiment", function(lox, sampleset="all_samples", codeset="all_codes", s1, s2, colorBy="size", labels="alias", sizeRange=NULL, dist_origRange=NULL, firstreadRange=NULL) {
# Labels
name1 = switch(labels, "alias" = get_alias(lox, sampleset, s1@name), "sample" = s1@name)
name2 = switch(labels, "alias" = get_alias(lox, sampleset, s2@name), "sample" = s2@name)
# Comparison Table
comp_table = get_comparison_table2(s1, s2)
# Subset Comparison Table
comp_table <- comp_table[comp_table$code %in% lox@code_sets[[codeset]]$code, ]
if (!is.null(sizeRange)) {
comp_table <- subset(comp_table, size >= sizeRange[1] & size <= sizeRange[2])
}
if (!is.null(dist_origRange)) {
comp_table <- subset(comp_table, dist_orig >= dist_origRange[1] & dist_orig <= dist_origRange[2])
}
if (!is.null(firstreadRange)) {
comp_table <- subset(comp_table, firstread >= firstreadRange[1] & firstread <= firstreadRange[2])
}
nonzero_mask <- (comp_table$s1_count > 0 & comp_table$s2_count > 0)
s1_zero_mask <- (comp_table$s1_count == 0)
s2_zero_mask <- (comp_table$s2_count == 0)
theme_update(plot.title = element_text(hjust = 0.5))
color = switch(colorBy,
"size" = as.factor(comp_table$size[nonzero_mask]),
"dist_orig" = as.factor(comp_table$dist_orig[nonzero_mask]),
"firstread" = log10(comp_table$firstread[nonzero_mask]))
# Main plot
g = ggplot() +
geom_point(aes(y = comp_table$log_s1[nonzero_mask] , x = comp_table$log_s2[nonzero_mask], color = color)) +
geom_abline(alpha = 0.75) +
ylab(paste0('log10(1 + reads in ', name1, ')')) +
xlab(paste0('log10(1 + reads in ', name2, ')')) +
ggtitle(paste(name1, 'vs', name2))
if (colorBy == "firstread") {
g = g + scale_color_continuous(name = colorBy)
} else {
g = g + scale_color_discrete(name = colorBy)
}
# Barcodes only in one sample
if (name1 != name2) {
color1 = switch(colorBy,
"size" = as.factor(comp_table$size[s1_zero_mask]),
"dist_orig" = as.factor(comp_table$dist_orig[s1_zero_mask]),
"firstread" = log10(comp_table$firstread[s1_zero_mask]))
color2 = switch(colorBy,
"size" = as.factor(comp_table$size[s2_zero_mask]),
"dist_orig" = as.factor(comp_table$dist_orig[s2_zero_mask]),
"firstread" = log10(comp_table$firstread[s2_zero_mask]))
g = g +
geom_quasirandom(aes(y = rep(-0.2, sum(s1_zero_mask)), x = log10(1 + comp_table$s2_count[s1_zero_mask]), color = color1), groupOnX = FALSE, width = 0.2) +
geom_quasirandom(aes(y = log10(1 + comp_table$s1_count[s2_zero_mask]), x = rep(-0.2, sum(s2_zero_mask)), color = color2), groupOnX = TRUE, width = 0.2)
}
return(g)
})
#' Comparison table (v3)
#'
#' Gets the comparison table used to produce the pair_comparison plot
#' @param lox Loxcode_experiment object
#' @param sampleset sample set
#' @param codeset code set
#' @param s1 name of sample 1
#' @param s2 name of sample 2
#' @param colorBy Color points by size/dist_orig/firstread
#' @param labels Sample name or alias as labels
#' @param sizeRange restrict the range of code sizes displayed
#' @param dist_origRange restrict the range of code complexities displayed
#' @param firstreadRange restrict the range of firstreads displayed
#' @return pair comparison plot
#'
#' @export
setGeneric("pair_comparison_plot3", function(lox, sampleset="all_samples", codeset="all_codes", s1, s2, colorBy="size", labels="alias", sizeRange=NULL, dist_origRange=NULL, firstreadRange=NULL) {standardGeneric("pair_comparison_plot3")} )
setMethod("pair_comparison_plot3", "loxcode_experiment", function(lox, sampleset="all_samples", codeset="all_codes", s1, s2, colorBy="size", labels="alias", sizeRange=NULL, dist_origRange=NULL, firstreadRange=NULL) {
# Labels
name1 = switch(labels, "alias" = get_alias(lox, sampleset, s1@name), "sample" = s1@name)
name2 = switch(labels, "alias" = get_alias(lox, sampleset, s2@name), "sample" = s2@name)
# Comparison Table
comp_table = get_comparison_table3(lox, sampleset, )
# Subset Comparison Table
comp_table <- comp_table[comp_table$code %in% lox@code_sets[[codeset]]$code, ]
if (!is.null(sizeRange)) {
comp_table <- subset(comp_table, size >= sizeRange[1] & size <= sizeRange[2])
}
if (!is.null(dist_origRange)) {
comp_table <- subset(comp_table, dist_orig >= dist_origRange[1] & dist_orig <= dist_origRange[2])
}
if (!is.null(firstreadRange)) {
comp_table <- subset(comp_table, firstread >= firstreadRange[1] & firstread <= firstreadRange[2])
}
nonzero_mask <- (comp_table$s1_count > 0 & comp_table$s2_count > 0)
s1_zero_mask <- (comp_table$s1_count == 0)
s2_zero_mask <- (comp_table$s2_count == 0)
theme_update(plot.title = element_text(hjust = 0.5))
color = switch(colorBy,
"size" = as.factor(comp_table$size[nonzero_mask]),
"dist_orig" = as.factor(comp_table$dist_orig[nonzero_mask]),
"firstread" = log10(comp_table$firstread[nonzero_mask]))
# Main plot
g = ggplot() +
geom_point(aes(y = comp_table$log_s1[nonzero_mask] , x = comp_table$log_s2[nonzero_mask], color = color)) +
geom_abline(alpha = 0.75) +
ylab(paste0('log10(1 + reads in ', name1, ')')) +
xlab(paste0('log10(1 + reads in ', name2, ')')) +
ggtitle(paste(name1, 'vs', name2))
if (colorBy == "firstread") {
g = g + scale_color_continuous(name = colorBy)
} else {
g = g + scale_color_discrete(name = colorBy)
}
# Barcodes only in one sample
if (name1 != name2) {
color1 = switch(colorBy,
"size" = as.factor(comp_table$size[s1_zero_mask]),
"dist_orig" = as.factor(comp_table$dist_orig[s1_zero_mask]),
"firstread" = log10(comp_table$firstread[s1_zero_mask]))
color2 = switch(colorBy,
"size" = as.factor(comp_table$size[s2_zero_mask]),
"dist_orig" = as.factor(comp_table$dist_orig[s2_zero_mask]),
"firstread" = log10(comp_table$firstread[s2_zero_mask]))
g = g +
geom_quasirandom(aes(y = rep(-0.2, sum(s1_zero_mask)), x = log10(1 + comp_table$s2_count[s1_zero_mask]), color = color1), groupOnX = FALSE, width = 0.2) +
geom_quasirandom(aes(y = log10(1 + comp_table$s1_count[s2_zero_mask]), x = rep(-0.2, sum(s2_zero_mask)), color = color2), groupOnX = TRUE, width = 0.2)
}
return(g)
})
#' @export
get_comparison_table2 <- function(s1, s2) {
# table of barcodes in both samples
counts = merge(s1@decode@data, s2@decode@data, by="code", all=TRUE)
counts$dist_orig = rowMeans(counts[ , grepl("dist_orig", names(counts))], na.rm = TRUE)
counts$size = rowMeans(counts[ , grepl("size", names(counts))], na.rm = TRUE)
counts$firstread = rowMeans(counts[ , grepl("firstread", names(counts))], na.rm = TRUE)
counts$count.x[is.na(counts$count.x)] <- 0
counts$count.y[is.na(counts$count.y)] <- 0
table = data.frame(code = counts$code,
size = counts$size,
dist_orig = counts$dist_orig,
firstread = counts$firstread,
s1_count = counts$count.x,
s2_count = counts$count.y,
stringsAsFactors = FALSE)
# scale by total number of reads
table$s2_count <- table$s2_count * (sum(loxcoder::data(s1)$count) / sum(loxcoder::data(s2)$count))
nonzero_mask <- (table$s1_count > 0 & table$s2_count > 0)
s1_zero_mask <- (table$s1_count == 0)
s2_zero_mask <- (table$s2_count == 0)
# log of counts
table$log_s1[nonzero_mask] = log10(1 + table$s1_count[nonzero_mask])
table$log_s2[nonzero_mask] = log10(1 + table$s2_count[nonzero_mask])
return(table)
}
#' Show recombination distance distribution by size as beeswarm plot
#'
#' Produces a beeswarm plot where for each cassette size, individual distinct barcodes are shown as
#' points. Size and color of points correspond to the read count of each barcode.
#' @param x loxcode_sample object
#' @param count_threshold counts threshold, barcodes with a count number exceeding this threshold are ignored. This is in order
#' to avoid having very large points resulting from barcodes with disproportionate read counts.
#' @export
setGeneric("dist_count_beeswarm_plot", function(x, count_threshold) {standardGeneric("dist_count_beeswarm_plot")})
setMethod("dist_count_beeswarm_plot", "loxcode_sample", function(x, count_threshold){
loxcoder::valid(x) %>% filter(count < count_threshold) -> y
g <- ggplot(data = y) + geom_quasirandom(width = 0.9, groupOnX = F, aes(x = dist_orig, y = size, size = count, color = count), alpha = 0.2) +
scale_size_area("num_reads") +
scale_x_continuous("distance from origin", breaks = 0:15) +
scale_y_continuous("size", breaks = c(3, 5, 7, 9, 13)) +
scale_color_gradient(low = 'blue', high = 'red') +
ggtitle(loxcoder::name(x))
return(g)
})
#' Generate heatmap from count_matrix (TW/31/10/2019)
#' @param loxcode_experiment loxcode_experiment object
#' @export
setGeneric("heatmap_plot", function(loxcode_experiment,count_matrix="all_samples",code_set="all_codes",style="ggplot", labels="sample", clustering="none",agglomeration="complete",min_reads=0,max_repeats=100,min_repeats=1) {standardGeneric("heatmap_plot")})
setMethod("heatmap_plot", "loxcode_experiment", function(loxcode_experiment,count_matrix="all_samples",code_set="all_codes",style="ggplot", labels="sample", clustering="none",agglomeration="complete",min_reads=0,max_repeats=100,min_repeats=1){
x = loxcode_experiment
#clustering
cluster_row = switch(clustering, "none"=FALSE, "row"=TRUE, "col"=FALSE, "both"=TRUE)
cluster_col = switch(clustering, "none"=FALSE, "row"=FALSE, "col"=TRUE, "both"=TRUE)
##select only codes that are present in code_set
m=x@count_matrixes[[count_matrix]]
m=m[row.names(m)%in%x@code_sets[[code_set]]$code,]
m[m<min_reads]=0
m=m[rowSums(m)>0,]
m=m[rowSums(m>0)<max_repeats,]
m=m[rowSums(m>0)>=min_repeats,]
m=m[,colSums(m)>0]
#m=sweep(m,2,colSums(m),`/`)
if(cluster_row==TRUE) {row.order <- hclust(dist(m),method=agglomeration)$order; m <- m[row.order, ] }
if(cluster_col==TRUE) {col.order <- hclust(dist(t(m)),method=agglomeration)$order; m <- m[, col.order]}
if(cluster_row==FALSE & cluster_col==FALSE & agglomeration =="binary")
{
binary=array()
for(i in 1:nrow(m)) binary[i]=paste0(as.numeric((m>0)[i,]),collapse = "")
m=m[order(binary,rowSums(m)),]
}
m$pos=1:nrow(m)
m$code=row.names(m)
mm=reshape::melt(m,id.vars=c("pos","code"))
m=subset(m, select=-c(pos,code));
mm[mm$value==0,]$value=NA
labs <- switch(labels, "sample"=names(m), "alias"=lapply(names(m), get_alias, lox=x, set=count_matrix))
ggplot(mm)+
geom_tile(aes(pos,as.numeric(variable),fill=log10(value+1),group=code))+
scale_fill_gradient2(midpoint = 0,high = "darkred",mid = "lightgray",name="log10(counts+1)")+
theme_minimal()+
xlab("")+
ylab("samples")+xlab("loxcodes")+
scale_x_continuous(expand = c(0, 0))+
scale_y_continuous(breaks=1:length(labs),labels=labs,sec.axis = dup_axis(labels=colSums(m>0),name="# codes"))+
theme(legend.position="top")
})
#' Generate bubble from count_matrix (TW/07/02/2019)
#' @param loxcode_experiment loxcode_experiment object
#' @export
setGeneric("bubble_plot", function(loxcode_experiment,count_matrix="all_samples",code_set="all_codes",style="ggplot", labels="sample", clustering="none",agglomeration="complete",min_reads=0,max_repeats=100,min_repeats=1) {standardGeneric("bubble_plot")})
setMethod("bubble_plot", "loxcode_experiment", function(loxcode_experiment,count_matrix="all_samples",code_set="all_codes",style="ggplot", labels="sample", clustering="none",agglomeration="complete",min_reads=0,max_repeats=100,min_repeats=1){
x = loxcode_experiment
#clustering
cluster_row = switch(clustering, "none"=FALSE, "row"=TRUE, "col"=FALSE, "both"=TRUE)
cluster_col = switch(clustering, "none"=FALSE, "row"=FALSE, "col"=TRUE, "both"=TRUE)
##select only codes that are present in code_set
m=x@count_matrixes[[count_matrix]]
m=m[row.names(m)%in%x@code_sets[[code_set]]$code,]
m[m<min_reads]=0
m=m[rowSums(m)>0,]
m=m[rowSums(m>0)<max_repeats,]
m=m[rowSums(m>0)>=min_repeats,]
m=m[,colSums(m)>0]
m=sweep(m,2,colSums(m),`/`)
if(cluster_row==TRUE) {row.order <- hclust(dist(m),method=agglomeration)$order; m <- m[row.order, ] }
if(cluster_col==TRUE) {col.order <- hclust(dist(t(m)),method=agglomeration)$order; m <- m[, col.order]}
if(cluster_row==FALSE & cluster_col==FALSE & agglomeration =="binary")
{
binary=array()
for(i in 1:nrow(m)) binary[i]=paste0(as.numeric((m>0)[i,]),collapse = "")
m=m[order(binary,rowSums(m)),]
print(table(binary))
}
m$index=1:nrow(m)
m$code=row.names(m)
mm=reshape::melt(m,id.var=c("index","code"))
source("~/R/functions/randomcolor.R")
labs <- switch(labels, "sample"=names(m), "alias"=lapply(names(m), get_alias, lox=x, set=count_matrix))
ggplot()+
geom_hline(aes(yintercept = as.vector(1:10 %o% 10^(0:-7))),color="lightgray", size=0.1,alpha=0.3)+
scale_color_manual(values = unname(distinctColorPalette(nrow(m))),breaks=rownames(m),guide = FALSE)+
geom_point(aes(factor(variable),index,color=factor(index),size=log(value)),data=mm,alpha=0.8)+
theme(legend.position="none")+
scale_radius(range = c(1,10))+
theme_minimal()+
theme(plot.title = element_text(size = 7),legend.position="none")+
xlab("samples")+ylab("")+
scale_x_discrete(labels=labs)+
theme(
panel.grid.major.x = element_blank() ,
panel.grid.minor.y = element_blank() ,
axis.text=element_text(size=10)
#axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)
)+coord_flip()
})
#' Read stats (TW/01/11/2019)
#' @param x loxcode_experiment object
#' @param count_matrix (not implemented yet)
#' @param code_set (not implemented yet)
#' @param plot specify which plot (size/complexity/both/ratio)
#' @param fill specify whether reads should be normalized in boxplots
#' @export
setGeneric("readstats_plot_old", function(x,count_matrix="all_samples",code_set="all_codes",plot="size",fill=TRUE, labels="sample") {standardGeneric("readstats_plot_old")})
setMethod("readstats_plot_old", "loxcode_experiment", function(x,count_matrix="all_samples",code_set="all_codes",plot="size", fill=TRUE, labels="sample"){
pos="stack"; if(fill==TRUE) pos="fill"
P=ggplot()
if(plot == "size"){
#iterating over samples and sizes and count # reads per condition
d1=data.frame();
for(i in x@samples){
if (i@name %in% names(x@count_matrixes[[count_matrix]])){
dd=i@decode@data;
for(j in c(1:13)){
ddd=data.frame(sample=name(i), size=j, count=sum(subset(dd,size==j)$count))
d1=plyr::rbind.fill(d1,ddd);
print(ddd)
}
}
}
d1=subset(d1,count>0)
P=(ggplot(d1)+geom_bar(aes(fill=factor(size), y=count, x=sample),position=pos, stat="identity")+
scale_fill_discrete(name="size")+
theme_minimal())
}
if(plot == "complexity"){
#iterating over samples and complexity and count # reads per condition
d2=data.frame();
for(i in x@samples){
if (i@name %in% names(x@count_matrixes[[count_matrix]])) {
dd=i@decode@data;
for(j in c(0:15)){
ddd=data.frame(sample=name(i), dist_orig=j, count=sum(subset(dd,dist_orig==j)$count))
d2=plyr::rbind.fill(d2,ddd);
}
}
}
d2=subset(d2,count>0)
P=(ggplot(d2)+geom_bar(aes(fill=factor(dist_orig), y=count, x=sample),position=pos, stat="identity")+
scale_fill_discrete(name="complexity")+
theme_minimal())
}
## plotting reads for both complexity and size
if(plot == "both"){
#iterating over sizes and complexities
d3=data.frame();
for(i in x@samples){
#if (i@name %in% names(x@count_matrixes[[count_matrix]])) {
dd=i@decode@data;
for(j in c(0:15))for(k in c(1:13)){
ddd=data.frame(sample=name(i), dist_orig=j, size=k, reads=sum(subset(dd,dist_orig==j & size==k)$count))
d3=plyr::rbind.fill(d3,ddd);
}
#}
}
d3=subset(d3,reads>0)
P=(ggplot(d3)+facet_wrap(~sample)+geom_point(aes(factor(size),factor(dist_orig), size=reads, color=(reads)))+
scale_fill_discrete(name="complexity")+
theme_minimal()+xlab("size")+ylab("complexity")+
scale_size_area()+scale_color_gradient(low = "blue", high = "red"))
}
## plotting ratio barcodes/reads
if(plot == "ratio"){
#iterating over sizes and complexities
d4=data.frame();
for(i in x@samples){
if (i@name %in% names(x@count_matrixes[[count_matrix]])) {
dd=i@decode@data;
for(j in c(0:15))for(k in c(1:13)){
ddd=data.frame(sample=name(i), dist_orig=j, size=k, ratio=nrow(subset(dd,dist_orig==j & size==k))/sum(subset(dd,dist_orig==j & size==k)$count))
d4=plyr::rbind.fill(d4,ddd);
}
}
}
d4=subset(d4,ratio>0)
P=(ggplot(d4)+facet_wrap(~sample)+geom_point(aes(factor(size),factor(dist_orig), size=ratio, color=(ratio)))+
scale_fill_discrete(name="complexity")+
theme_minimal()+xlab("size")+ylab("complexity")+
scale_size_area()+scale_color_gradient(low = "blue", high = "red"))
}
P
})
# ggplot()+geom_histogram(aes(NN167@samples[["N701-N505"]]@decode@data$size), stat="count")
#' readstats plot
#'
#' @param loxcode_experiment loxcode_experiment object
#' @param count_matrix
#' @param code_set
#' @param plot specify which plot (size/complexity/both/ratio)
#' @param fill specify whether reads should be normalized in boxplots
#' @export
setGeneric("readstats_plot", function(loxcode_experiment,count_matrix="all_samples",code_set="all_codes",plot="size",fill=TRUE, labels="alias") {standardGeneric("readstats_plot")})
setMethod("readstats_plot", "loxcode_experiment", function(loxcode_experiment,count_matrix="all_samples",code_set="all_codes",plot="size", fill=TRUE, labels="sample"){
x = loxcode_experiment
# graph config
pos = "stack"
if (fill==TRUE) { pos = "fill" }
# initialize
p = ggplot()
counts = x@count_matrixes[[count_matrix]]
codes = x@code_sets[[code_set]]
samples = names(counts)
### SIZE PLOT
if (plot == "size"){
sizeBySample = data.frame()
# create size count table
for (i in 1:length(samples)) {
for (j in c(1:13)){
row = data.frame(sample=samples[[i]], alias=get_alias(x, count_matrix, samples[[i]]), size=j, count=0)
sizeBySample = plyr::rbind.fill(sizeBySample, row)
}
}
# fill size count table
for (i in 1:nrow(counts)) {
row = counts[i, ]
code = rownames(row)
if (code %in% codes$code) {
size = length(gregexpr(" ", code)[[1]]) + 1 # one more than number of spaces
for (j in names(row)) {
curr = row[[j]]
index = which(sizeBySample$sample==j & sizeBySample$size==size)
prev = sizeBySample$count[[index]]
sizeBySample$count[[index]] = curr + prev
}
}
}
sizeBySample = subset(sizeBySample, count>0)
p = ggplot(sizeBySample) +
geom_bar(aes(fill=factor(size), y=count, x=switch(labels, "alias"=alias, "sample"=sample)), position=pos, stat="identity") +
scale_fill_discrete(name="size") +
labs(x="sample")
}
### COMPLEXITY PLOT
if (plot=="complexity") {
# create complexity count table
complexityBySample = data.frame()
for (i in 1:length(samples)) {
for (j in c(0:15)){
row = data.frame(sample=samples[[i]], alias=get_alias(x, count_matrix, samples[[i]]), dist_orig=j, count=0)
complexityBySample = plyr::rbind.fill(complexityBySample, row)
}
}
# fill complexity count table
for (i in 1:nrow(counts)) {
row = counts[i, ]
code = rownames(row)
if (code %in% codes$code) {
dist = codes$dist_orig[[match(code, codes$code)]]
if (is.na(dist)) { dist = 0 }
for (j in names(row)) {
curr = row[[j]]
index = which(complexityBySample$sample==j & complexityBySample$dist_orig==dist)
prev = complexityBySample$count[[index]]
complexityBySample$count[[index]] = curr + prev
}
}
}
complexityBySample = subset(complexityBySample, count>0)
p = ggplot(complexityBySample) +
geom_bar(aes(fill=factor(dist_orig), y=count, x=switch(labels, "alias"=alias, "sample"=sample)), position=pos, stat="identity") +
scale_fill_discrete(name="complexity") +
labs(x="sample")
}
### SIZE COMPLEXITY PLOT
if (plot=="both") {
# create counts table
bothBySample = data.frame()
for (i in 1:length(samples)) {
for (j in 0:15) for (k in 1:13) {
row = data.frame(sample=samples[[i]], alias=get_alias(x, count_matrix, samples[[i]]), dist_orig=j, size=k, count=0)
bothBySample = plyr::rbind.fill(bothBySample, row)
}
}
# fill counts table
for (i in 1:nrow(counts)) {
row = counts[i, ]
code = rownames(row)
if (code %in% codes$code) {
size = length(gregexpr(" ", code)[[1]]) + 1 # one more than number of spaces
dist = codes$dist_orig[[match(code, codes$code)]]
if (is.na(dist)) { dist = 0 }
for (j in names(row)) {
curr = row[[j]]
index = which(bothBySample$sample==j & bothBySample$dist_orig==dist & bothBySample$size==size)
prev = bothBySample$count[[index]]
bothBySample$count[[index]] = curr + prev
}
}
}
bothBySample = subset(bothBySample, count>0)
p = (ggplot(bothBySample) + facet_wrap(~switch(labels, "alias"=alias, "sample"=sample)) + geom_point(aes(factor(size), factor(dist_orig), size=count, color=count)) +
scale_fill_discrete(name="complexity") +
theme_minimal() + xlab("size") + ylab("complexity") +
scale_size_area() + scale_color_gradient(low="blue", high="red"))
}
### RATIO PLOT
if (plot=="ratio") {
# create ratio table
ratioBySample = data.frame()
for (i in 1:length(samples)) {
for (j in 0:15) for (k in 1:13) {
row = data.frame(sample=samples[[i]], alias=get_alias(x, count_matrix, samples[[i]]), dist_orig=j, size=k, ratio=0, ncodes=0, count=0)
ratioBySample = plyr::rbind.fill(ratioBySample, row)
}
}
# fill ratio table
for (i in 1:nrow(counts)) {
row = counts[i, ]
code = rownames(row)
if (code %in% codes$code) {
size = length(gregexpr(" ", code)[[1]]) + 1 # one more than number of spaces
dist = codes$dist_orig[[match(code, codes$code)]]
if (is.na(dist)) { dist = 0 }
for (j in names(row)) {
curr = row[[j]]
index = which(ratioBySample$sample==j & ratioBySample$dist_orig==dist & ratioBySample$size==size)
prev = ratioBySample$count[[index]]
ratioBySample$count[[index]] = curr + prev
ratioBySample$ncodes[[index]] = ratioBySample$ncodes[[index]] + 1
ratioBySample$ratio[[index]] = ratioBySample$ncodes[[index]] / ratioBySample$count[[index]]
}
}
}
ratioBySample$count <- NULL
ratioBySample$ncodes <- NULL
ratioBySample = subset(ratioBySample, ratio>0)
ratioBySample = subset(ratioBySample, ratio!=Inf)
p = (ggplot(ratioBySample) + facet_wrap(~switch(labels, "alias"=alias, "sample"=sample)) + geom_point(aes(factor(size),factor(dist_orig), size=ratio, color=(ratio)))+
scale_fill_discrete(name="complexity")+
theme_minimal()+xlab("size")+ylab("complexity")+
scale_size_area()+scale_color_gradient(low = "blue", high = "red"))
}
return (p)
})
#' sample comparison pie
#' @param x loxcode_experiment object
#' @param count_matrix
#' @param code_set
#' @param scale scale the size of pies
#' @param labels by sample or alias
#' @export
setGeneric("sample_comparison_pie", function(x,count_matrix="all_samples",code_set="all_codes", scale=1, labels="sample") {standardGeneric("sample_comparison_pie")})
setMethod("sample_comparison_pie", "loxcode_experiment", function(x,count_matrix="all_samples",code_set="all_codes", scale=1, labels="sample"){
M=x@count_matrixes[[count_matrix]]
M=M[row.names(M)%in%x@code_sets[[code_set]]$code,]
m=data.frame();
for(i in 1:ncol(M))for(j in 1:ncol(M)){
both=sum(M[,i]>0 & M[,j]>0,na.rm=TRUE)
in.1=sum(M[,i]>0 & M[,j]==0,na.rm=TRUE)
in.2=sum(M[,j]>0 & M[,i]==0,na.rm=TRUE)
d=data.frame(sample1=i, sample2=j,both=both,in.1=in.1,in.2=in.2,stringsAsFactors = FALSE)
m=plyr::rbind.fill(m,d)
}
library('scatterpie')
sample_names = names(x@count_matrixes[[count_matrix]])
labels = switch(labels, "sample"=sample_names, "alias"=lapply(sample_names, get_alias, lox=x, set=count_matrix))
ggplot(m)+
scatterpie::geom_scatterpie(aes(x=sample1,y=sample2,r=(both+in.1+in.2)/(1200/scale)),data=m, cols=c(3:5),size=0.1)+
scale_x_continuous(breaks=unique(m$sample1),labels=labels)+
scale_y_continuous(breaks=unique(m$sample2),labels=labels)+
theme_bw()+theme(axis.text.x=element_text(angle=45, hjust=1))+
scale_fill_discrete(name="")
})
#' saturation plot (TW/05/11/2019)
#' @param loxcode_experiment loxcode_experiment object
#' @param loxcode_sample which sample to use
#' @param code_set which code set to use
#' @export
setGeneric("saturation_plot", function(loxcode_experiment, loxcode_sample, code_set="all_codes") {standardGeneric("saturation_plot")})
setMethod("saturation_plot", "loxcode_experiment", function(loxcode_experiment, loxcode_sample, code_set="all_codes"){
x = loxcode_experiment
i = x@samples[[loxcode_sample]]
cs=i@decode@data;
cs=subset(cs,code%in%x@code_sets[[code_set]]$code)
cs=cs[order(cs$firstread),]
cs$index=c(1:nrow(cs))
new = data.frame(firstread = cs$firstread, index = cs$index, dist_orig=cs$dist_orig)
g <- ggplot(new)+geom_step(aes(firstread,index,color=factor(dist_orig)))+geom_point(aes(firstread,index,color=factor(dist_orig)))
# g <- ggplot(new) + geom_line(aes(firstread1,index1,color=dist_orig1)) + geom_line(aes(firstread2,index2,color=dist_orig2))+scale_colour_manual()
return (g)
})
#' Multiple lined saturation plot
#'
#' @param loxcode_experiment loxcode_experiment object
#' @param loxcode_samples list of loxcode samples
#' @param code_sets list of code_sets to use
#' @param colorby colour by dist_orig or sample
#' @export
setGeneric("saturation_multi", function(loxcode_experiment, loxcode_samples, codesets, colorby="dist_orig") {standardGeneric("saturation_multi")})
setMethod("saturation_multi", "loxcode_experiment", function(loxcode_experiment, loxcode_samples, codesets, colorby="dist_orig") {
x = loxcode_experiment
data = data.frame()
# create the data.frame
for (i in 1:length(loxcode_samples)) {
s = loxcode_samples[[i]]
c = codesets[[i]]
sample = x@samples[[s]]
counts = sample@decode@data
counts$dist_orig[is.na(counts$dist_orig)] = 0
counts = subset(counts, counts$code %in% x@code_sets[[codesets[[i]]]]$code)
counts = counts[order(counts$firstread), ]
counts$index = c(1:nrow(counts))
one_sample_data = data.frame(firstread = counts$firstread,
index = counts$index,
dist_orig = counts$dist_orig,
sample = s)
colour = switch(colorby, "dist_orig" = one_sample_data$dist_orig, "sample" = one_sample_data$sample)
if (i==1) {
g <- ggplot(one_sample_data, aes(firstread, index)) +
geom_line(data=one_sample_data) +
geom_point(data=one_sample_data, aes(firstread, index, colour = switch(colorby, "dist_orig" = factor(dist_orig), "sample" = sample)))+
scale_color_discrete(name="")
}
else {
g <- g +
geom_line(data=one_sample_data) +
geom_point(data=one_sample_data, aes(firstread, index, colour = switch(colorby, "dist_orig" = factor(dist_orig), "sample" = sample)))
}
}
return(g)
})
#' 2-way Venn diagram
#'
#' Plot 2-way Venn diagram showing common and distinct barcodes between two given samples in an experiment using VennDiagram package
#' @param x loxcode_experiment object
#' @param a sample A name
#' @param b sample B name
#' @param size_range range of loxcode sizes to show in Venn diagram. Both lower and upper bounds are inclusive
#' @param dist_range range of dist_orig values to show in Venn diagram. Both lower and upper bounds are inclusive
#' @param labels optional list of custom labels for samples A, B, otherwise sample names for A, B are used.
#' @return grobTree containing the 2-way Venn diagram graphic
#' @export
setGeneric("venn_2way", function(x, a, b, size_range, dist_range, ...){standardGeneric("venn_2way")})
setMethod("venn_2way", "loxcode_experiment", function(x, a, b, size_range, dist_range, labels = NA){
samp_names <- c(a, b)
samp <- as.list(samp_names)
if(!is.na(labels)){
samp_names <- labels
}
samp <- lapply(samp, function(s){
return(loxcoder::get_valid(x, s) %>%
filter(size >= size_range[1] &
size <= size_range[2] &
dist_orig >= dist_range[1] &
dist_orig <= dist_range[2]))
})
v <- draw.pairwise.venn(area1 = nrow(samp[[1]]),
area2 = nrow(samp[[2]]),
cross.area = length(intersect(samp[[1]]$code, samp[[2]]$code)),
category = samp_names,
fontfamily = rep('sans', 3),
cat.fontfamily = rep('sans', 2),
fill = c('red', 'blue'))
return(grobTree(children = v))
})
#' 3-way Venn diagram
#'
#' Plot 3-way Venn diagram showing common and distinct barcodes between two given samples in an experiment using VennDiagram package
#' @param x loxcode_experiment object
#' @param a sample A name
#' @param b sample B name
#' @param c sample C name
#' @param size_range range of loxcode sizes to show in Venn diagram. Both lower and upper bounds are inclusive
#' @param dist_range range of dist_orig values to show in Venn diagram. Both lower and upper bounds are inclusive
#' @param labels optional list of custom labels for samples A, B, C, otherwise sample names for A, B, C are used.
#' @return grobTree containing the 3-way Venn diagram graphic
#' @export
#' @export
setGeneric("venn_3way", function(x, a, b, c, size_range, dist_range, ...){standardGeneric("venn_3way")})
setMethod("venn_3way", "loxcode_experiment", function(x, a, b, c, size_range, dist_range, labels = NA){
samp_names <- c(a, b, c)
samp <- as.list(samp_names)
names(samp) <- samp_names
if(!is.na(labels)){
samp_names <- labels
}
samp <- lapply(samp, function(s){ return(loxcoder::get_valid(x, s) %>%
filter(size >= size_range[1] &
size <= size_range[2] &
dist_orig >= dist_range[1] &
dist_orig <= dist_range[2]))} )
pairs <- list("n12" = c(1, 2), "n23" = c(2, 3), "n13" = c(1, 3));
n_pairs <- lapply(pairs, function(x){
length(intersect(samp[[x[1]]]$code, samp[[x[2]]]$code))
})
n_all <- length(intersect(samp[[1]]$code, intersect(samp[[2]]$code, samp[[3]]$code)))
v <- draw.triple.venn(area1 = nrow(samp[[1]]),
area2 = nrow(samp[[2]]),
area3 = nrow(samp[[3]]),
n12 = n_pairs$n12,
n23 = n_pairs$n23,
n13 = n_pairs$n13,
n123 = n_all,
category = samp_names,
fontfamily = rep('sans', 7),
cat.fontfamily = rep('sans', 3),
fill = c('red', 'blue', 'green'))
return(grobTree(children = v))
})
#' sample table
#'
#' Tabulates the barcode counts, total number of reads, maximum complexity and consensus filtered for each sample in an experiment
#' @param x loxcode_experiment object
#' @param c count_matrix of samples to display
#' @export
setGeneric("sample_table", function(x, c) {standardGeneric("sample_table")})
setMethod("sample_table", "loxcode_experiment", function(x, c){
d=data.frame()
for (i in 1:length(x@samples)) {
curr_sample = x@samples[[i]]
if (name(curr_sample) %in% names(x@count_matrixes[[c]])){
temp=data.frame(
"Sample_Name"= name(curr_sample),
curr_sample@meta,
stringsAsFactors = FALSE)
d=plyr::rbind.fill(d,temp)
}
}
return(d)
})
strCap <- function(y) {
c <- strsplit(y, " ")[[1]]
paste(toupper(substring(c, 1,1)), substring(c, 2),
sep="", collapse=" ")
}
capitalize <- function(w) {
cap = c()
for (word in w) {
cap = append(cap, strCap(word))
}
return (cap)
}
#' summary table
#'
#' Tabulates the barcode counts, total number of reads, maximum complexity and consensus filtered for each sample in an experiment
#' @param x loxcode_experiment object
#' @param c count_matrix of samples to display
#' @export
setGeneric("summary_table2", function(x, c="all_samples") {standardGeneric("summary_table2")})
setMethod("summary_table2", "loxcode_experiment", function(x, c="all_samples"){
d=data.frame()
if (!length(x@count_matrixes[[c]])) { return() }
# check if samples have been collapsed
collapsed = TRUE
if (length(intersect(names(x@samples), names(x@count_matrixes[[c]]))) != 0){
collapsed = FALSE
}
# extract data from loxcode experiment
if (collapsed == FALSE) {
for (i in 1:length(x@samples)) {
curr_sample = x@samples[[i]]
metadata = curr_sample@meta
names(metadata) <- capitalize(names(metadata))
if (name(curr_sample) %in% names(x@count_matrixes[[c]])){
temp=data.frame("Sample_Name" = name(curr_sample),
"Alias" = x@alias[[c]]$alias[[match(name(curr_sample), x@alias[[c]]$sample_name)]],
metadata,
"Barcode_Count" = length(curr_sample@decode@data$count),
"Invalid_Count" = length(curr_sample@decode@data$count) - sum(curr_sample@decode@data$is_valid, na.rm=FALSE),
"Number_of_Reads" = curr_sample@decode_stats$tot_reads,
"Max_Complexity" = max(na.omit(curr_sample@decode@data$dist_orig)),
"Consensus_Filtered" = curr_sample@decode_stats$consensus_filtered,
"Percent_Filtered" = round(100*curr_sample@decode_stats$consensus_filtered/curr_sample@decode_stats$tot_reads,2),
stringsAsFactors = FALSE)
d = plyr::rbind.fill(d,temp)
}
}
}
else {
counts = x@count_matrixes[[c]]
all_codes = x@code_sets[["all_codes"]]
invalid_codes = x@code_sets[["invalid_codes"]]
meta = get_collapsed_meta(x, c)
for (i in 1:ncol(counts)) {
curr = names(counts)[i]
barcodes = row.names(counts[which (counts[[curr]]>0), ])
metadata = meta[curr, ]
names(metadata) <- capitalize(names(meta))
temp = data.frame("Sample_Name" = curr,
"Alias" = x@alias[[c]]$alias[[match(curr, x@alias[[c]]$sample_name)]],
metadata,
"Barcode_Count" = sum(counts[,i]>0),
"Invalid_Count" = sum(barcodes %in% row.names(invalid_codes)),
"Number_of_Reads" = round(sum(counts[,i]), 0),
"Max_Complexity" = max(all_codes[which (all_codes$code %in% barcodes), ]$dist_orig, na.rm=TRUE),
stringsAsFactors = FALSE)
d = plyr::rbind.fill(d, temp)
}
names(d)[names(d) == "metadata"] = names(meta)
}
return(d)
})
#' Summary table v2
#'
#' @param lox loxcode_experiment object
#' @param s sample set
#' @return table of sample information
#' @export
setGeneric("summary_table", function(lox, s="all_samples") {standardGeneric("summary_table")})
setMethod("summary_table", "loxcode_experiment", function(lox, s="all_samples") {
counts = lox@count_matrixes[[s]]
aliases = lox@alias[[s]]
all_codes = lox@code_sets[["all_codes"]]
invalid_codes = lox@code_sets[["invalid_codes"]]
meta = get_collapsed_meta(lox, s)
d = data.frame()
for (i in 1:ncol(counts)) {
curr = names(counts)[i]
barcodes = row.names(counts[which (counts[[curr]]>0), ])
metadata = meta[curr, ]
if (curr %in% names(lox@samples)) {
stats = lox@samples[[curr]]@decode_stats
nreads = stats$tot_reads
cfiltered = stats$consensus_filtered
pfiltered = round(100 * cfiltered / nreads, 2)
} else {
nreads = NA
cfiltered = round(sum(counts[,i]), 0)
pfiltered = NA
}
temp = data.frame("Sample_Name" = curr,
"Alias" = aliases$alias[match(curr, aliases$sample_name)],
metadata,
"Barcode_Count" = sum(counts[,i]>0),
"Invalid_Count" = sum(barcodes %in% row.names(invalid_codes)),
"Number_of_Reads" = nreads,
"Max_Complexity" = max(all_codes[which (all_codes$code %in% barcodes), ]$dist_orig, na.rm=TRUE),
"Consensus_Filtered" = cfiltered,
"Percent_Filtered" = pfiltered,
stringsAsFactors = FALSE)
d = plyr::rbind.fill(d, temp)
}
d$Max_Complexity = replace(d$Max_Complexity, is.infinite(d$Max_Complexity), 0)
return(d)
})
#' codeset table
#'
#' Tabulates the codes in a codeset
#' @param x loxcode_experiment object
#' @param n name of codeset to display
#' @return dataframe of codeset information
#' @export
setGeneric("codeset_table", function(x, n) {standardGeneric("codeset_table")})
setMethod("codeset_table", "loxcode_experiment", function(x, n){
table = x@code_sets[[n]]
names(table) <- capitalize(names(table))
return(table)
})
#' experiments table
#'
#' Presents a list of loxcode_experiments in table form
#' @param exp list of loxcode_experiment objects
#' @return a data.frame containing the information
#' @export
setGeneric("exp_table", function(exp) {standardGeneric("exp_table")})
setMethod("exp_table", "list", function(exp) {
d = data.frame()
if (length(exp)>0) {
for (i in 1:length(exp)){
directories = ""
curr = exp[[i]]
for (dir in curr@dir) {
directories = paste(directories, dir)
}
temp = data.frame("Experiment_Name"=curr@name,
"Directory"=directories,
"Samples"=length(curr@samples),
"Code_Sets"=length(curr@code_sets),
"Sample_Sets"=length(curr@count_matrixes),
stringsAsFactors=FALSE)
d = plyr::rbind.fill(d, temp)
}
}
return(d)
})
#' Code frequency table
#'
#' Creates a data.frame of the frequency for each size and complexity combination
#' @param x loxcode_experiment object
#' @param s sample set of independent samples
#' @param c code_set
#' @return data.frame of code frequencies
#' @export
setGeneric("code_freq_table", function(x, s="all_samples", c="all_codes") {standardGeneric("code_freq_table")})
setMethod("code_freq_table", "loxcode_experiment", function(x, s="all_samples", c="all_codes") {
codeset = x@code_sets[[c]]
counts = x@count_matrixes[[s]]
Y = data.frame()
if (!length(counts)) { return() } # prevents LoxCodeR from crashing
#if (has_warning(max(codeset$dist_orig, na.rm=TRUE))) {print("crash prevented"); return() }
# create a column in code sets recording frequency of barcodes
index = match(row.names(counts),codeset$code)
index = subset(index, !is.na(index))
codeset$rep[index] = rowSums(counts>0)
# create a data.frame with repetition frequencies for each size and complexity
for(i in c(1:max(codeset$size, na.rm=TRUE))) for(j in c(1:max(codeset$dist_orig, na.rm=TRUE))){
y=data.frame(size=i,dist_orig=j)
z=as.data.frame(table(subset(codeset, size==i & dist_orig==j)$rep),stringsAsFactors = FALSE)
if(nrow(z)==0) next;
n=z$Var1;
z=as.data.frame(z[,-1]) ##to-do: find a way to order count entries
y=cbind(y,sum(z),t(z))
names(y)=c("size","dist_orig","radius",n)
Y=plyr::rbind.fill(Y,y);
}
Y[is.na(Y)]=0
return(Y)
})
#' Filtered Code freq table
#'
#' Data.frame of filtered codes specified by user parameters
#' @param x loxcode_experiment object
#' @param s independent sample set
#' @param c code set
#' @param t tolerance threshold for repeated proportions
#' @param m maximum repeats tolerated
#' @return scatterpie of filtered codes
#' @export
setGeneric("filtered_codes_table", function(x, s="all_samples", c="all_codes", t=0.05, m=2) {standardGeneric("filtered_codes_table")})
setMethod("filtered_codes_table", "loxcode_experiment", function(x, s="all_samples", c="all_codes", t=0.05, m=2) {
# data-frame of repetition frequencies
Y = code_freq_table(x, s, c)
if (!length(x@code_sets[[c]]) | !length(x@count_matrixes[[s]])) { print("errorrrr"); return() }
YY = data.frame()
total = max(as.numeric(names(Y[,!names(Y)%in%c("size", "dist_orig", "radius")])))
for (i in 1:nrow(Y)) {
row = Y[i,]
proportion = sum(row[as.character(seq(m, total))] / sum(row[as.character(seq(1, total))]))
if (proportion*100<=t){
YY = plyr::rbind.fill(YY, row)
}
}
return(YY)
})
#' Code frequency pie
#'
#' Display the frequency proportions of codes of every permutation of size and complexity
#'
#' @param x loxcode_experiment object
#' @param s sample set of independent samples
#' @param c code_set
#' @return scatterpie plot
#' @export
setGeneric("code_frequency_pie", function(x, s="all_samples", c="all_codes") {standardGeneric("code_frequency_pie")})
setMethod("code_frequency_pie", "loxcode_experiment", function(x, s="all_samples", c="all_codes") {
Y = code_freq_table(x, s, c)
# scatterpie plot
g <- ggplot(Y)+
geom_scatterpie(aes(x=(size), y=(dist_orig), r=log10(radius)/10),data=Y,cols=c(4:ncol(Y)),size=0.1)+
scale_x_continuous(breaks=c(1,3,5,7,9,11,13))+scale_y_continuous(breaks=c(1:10))+
theme_bw()+scale_fill_discrete(name="Repeats")
return(g)
})
#' Filtered code frequency pie
#'
#' Filters out codes that have an overlap between independent samples
#' @param x loxcode_experiment object
#' @param s independent sample set
#' @param c code set
#' @param t tolerance threshold for repeated proportions
#' @param m maximum repeats tolerated
#' @return scatterpie of filtered codes
#' @export
setGeneric("filtered_codes_pie", function(x, s="all_samples", c="all_codes", t=0.05, m=2) {standardGeneric("filtered_codes_pie")})
setMethod("filtered_codes_pie", "loxcode_experiment", function(x, s="all_samples", c="all_codes", t=0.05, m=2) {
Y = code_freq_table(x, s, c)
YY = filtered_codes_table(x, s, c, t, m)
g <- ggplot(YY)+
geom_scatterpie(aes(x=(size), y=(dist_orig), r=log10(radius)/5),data=YY,cols=c(4:ncol(YY)),size=0.1)+
scale_x_continuous(breaks=c(1,3,5,7,9,11,13))+scale_y_continuous(breaks=c(1:max(YY$dist_orig)+1))+
theme_bw()+scale_fill_discrete(name="Repeats")
return(g)
})
#' Get sample alias
#'
#' @param lox loxcode_experiment object
#' @param set sample set name
#' @param name sample_name
#' @return sample alias
#' @export
setGeneric("get_alias", function(lox, set, name) {standardGeneric("get_alias")})
setMethod("get_alias", "loxcode_experiment", function(lox, set, name) {
aliases = lox@alias[[set]]
index = match(name, aliases$sample_name)
alias = NA
if (!is.na(index)) {
alias = aliases$alias[[index]]
}
return (alias)
})
#' Get sample name from alias
#'
#' @param lox loxcode_experiment object
#' @param set sample set name
#' @param name alias
#' @return sample alias
#' @export
setGeneric("get_samplename", function(lox, set, name) {standardGeneric("get_samplename")})
setMethod("get_samplename", "loxcode_experiment", function(lox, set, name) {
aliases = lox@alias[[set]]
index = match(name, aliases$alias)
samplename = NA
if (!is.na(index)) {
samplename = aliases$sample_name[[index]]
}
return (samplename)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.