fixSeqWareTSV <- function(t.df, valid.dt) {
t.df.colnames <- colnames(t.df)
# Library column is duplicated for some reason in Run Report TSV files. This removes is.
# Note that if duplicated column names with different data exist, the second one will be ignored.
t.df.colnames <- unique(t.df.colnames)
# Create a data table for each column
dt.list <- lapply(t.df.colnames, function(x) {
conversion <- valid.dt[file.name == x, ]
data <- t.df[[x]]
if (nrow(conversion) != 1) {
stop(
paste(
"Field name",
x,
"cannot be unambiguously parsed. It is found",
nrow(conversion),
"times in the annotation file. Expected 1."
)
)
}
# Split Read Length from a character column into two numeric columns
if (conversion$app.name == "#Split_Read1_Read2") {
read.length <- sapply(data, function(x)
strsplit(x, ",")[[1]])
data <-
as.data.table(list(
`R1 Length` = as.numeric(read.length[1, ]),
`R2 Length` = as.numeric(read.length[2, ])
))
return(data)
}
# Split insert size and SD from a character column into two numeric columns
if (conversion$app.name == "#Split_Mean_SD") {
in.sd <- gsub("[\\(\\)]", "", data)
in.sd <- sapply(in.sd, function(x)
strsplit(x, " ")[[1]])
data <-
as.data.table(list(
`Insert Mean` = as.numeric(in.sd[1, ]),
`Insert Stddev` = as.numeric(in.sd[2, ])
))
return(data)
}
# If it is a numeric type, only leave numbers and periods
if (conversion$type == "Numeric") {
data <- gsub("[^0-9\\.]", "", data)
data <- as.numeric(data)
}
result <- list()
# The column is renamed to canonical name
result[[conversion$app.name]] <- data
return(as.data.table(result))
})
result.dt <- as.data.table(dt.list)
# Add Study name
result.dt <- addCustomTSVMetrics(
result.dt,
valid.dt,
"Study",
sapply(strsplit(result.dt$Library, "_"), function(x) x[[1]])
)
# Add On Target Percentage
result.dt <- addCustomTSVMetrics(
result.dt,
valid.dt,
"On Target Percentage",
result.dt$`Percent Mapped on Target` * result.dt$`Map Percent` / 100
)
# Field keeps track if a library has been selected by user
result.dt <- addCustomTSVMetrics(
result.dt,
valid.dt,
"plotly_library_selected",
FALSE
)
# Field that must be unique for each library. For illumina that is: Library name + lane + barcode
result.dt <- addCustomTSVMetrics(
result.dt,
valid.dt,
"plotly_unique_key",
paste(result.dt$Library, result.dt$Lane, result.dt$Barcode, sep = "_")
)
# Add any missing columns as all NA (Columns from the valid.dt starting with # are ignored, as it is a flag to split column)
missing.columns <- setdiff(valid.dt$app.name, colnames(result.dt))
remove.flag.columns <- grepl('^[^#]', missing.columns) # Columns that do not start with flag
missing.columns <- missing.columns[remove.flag.columns]
if(!identical(missing.columns, character(0))) {
# https://stackoverflow.com/questions/11745169/dynamic-column-names-in-data-table
result.dt[, (missing.columns) := NA]
}
return(result.dt)
}
# Add fields not present in the original TSV file
addCustomTSVMetrics <- function(dt, valid.dt, field_name, field_values) {
if (field_name %in% colnames(dt)) {
stop(
paste(
"Custom field name cannot be added as it already exists:",
field_name
)
)
}
if (!(field_name %in% valid.dt$app.name)) {
stop(
paste(
"Custom field not specified in annotation file:",
field_name
)
)
}
set(dt, j = field_name, value = field_values)
}
createLong <- function(t.df, all_plots, info_columns) {
epic.dt.long <-
melt(
t.df,
id.vars = intersect(info_columns, colnames(t.df)),
variable.name = "Type",
value.name = "Value"
)
# Order the different metrics, so that they will always be displayed in the same order
# Only present metrics can be included in factor levels, as data.table::split bugs out otherwise
set(
epic.dt.long,
j = "Type",
value = factor(
epic.dt.long$Type,
levels = intersect(all_plots, epic.dt.long$Type)
)
)
setorder(epic.dt.long, Type)
return(epic.dt.long)
}
# This function loads the file and adds a column with the path
# This is necessary as the TSV file itself does not tell you which run it came from
# Any warning is thrown as an error, as fread tries its best to read even nonstandard files
readSeqWareTSV <- function(path) {
tryCatch({
dt <-
fread(
path,
sep = "\t",
header = TRUE,
na.strings = c("NA", "na")
)
if ("Run Name" %in% colnames(dt)) {
stop("Column 'Run Name' is used internally by the App and cannot be present in Run Report")
}
set(dt, j = "Run Name", value = factor(rep(path, nrow(dt))))
}, warning = function(w) {
# This is a crutch to fix a bug in data.table. If fread causes a warning, it is caught, but the function is left in a failed state.
# The next valid call to fread will fail again, as the function has to reset itself. The next valid call will work fine.
# I am manually making a valid call to fread to reset it
# Issue is at https://github.com/Rdatatable/data.table/issues/2904
tryCatch({
fread("echo bug, fix")
}, warning = function(w) {
# Squash the warning message produced by resetting fread
})
stop(conditionMessage(w))
})
}
# The data table that will be used throughout the app, starting from the path to the Run Report
createAppDT <- function(path, valid.dt) {
current.run <- readSeqWareTSV(path)
current.run <- fixSeqWareTSV(current.run, valid.dt)
return(current.run)
}
generateRunReportURL <- function(run_alias) {
paste("https://www.hpc.oicr.on.ca/archive/web/runReports/", run_alias, "/", run_alias, "_report.html", sep = "")
}
generateMisoRunURL <- function(run_alias) {
paste("https://miso.oicr.on.ca/miso/run/alias/", run_alias, sep = "")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.