# The exstra_score class
# Holds scores that give the proportion of a read that matches a given repeat
# TODO: inherit exstra_db, saving many rewriting of methods
#' @import data.table
#' @import stringr
#' @import xlsx
#' @import testit
#'
#' @export
is.exstra_score <- function(x) inherits(x, "exstra_score")
#
strs_read_ <- function(file, database, groups.regex = NULL, groups.samples = NULL, this.class = NULL) {
# Load the STR data, and give it the right class
assert("read.strs requires database to be class exstra_db", is.exstra_db(database))
assert("Need groups.samples or groups.regex to be defined", !is.null(groups.samples) || !is.null(groups.regex))
assert("Require exactly one of groups.samples or groups.regex to be defined", xor(is.null(groups.samples), is.null(groups.regex)))
assert("This function must have a class to return", !is.null(this.class))
# Read in data
counts <- read.delim(file)
# Add some info to the data
if(!is.null(groups.regex)) {
# using regex for groups
groups.regex <- rev(groups.regex) # we want the first argument of groups.regex to take priority, this behaviour replaces the old behaviour
if(is.null(names(groups.regex))) {
names(groups.regex) <- groups.regex
}
assert("Require groups to only to have names 'case', 'control' and 'null'.", is.element(names(groups.regex), c("case", "control", "null")))
groups_all <- factor(rep(NA, dim(counts)[1]), levels = names(groups.regex))
for(group.name in names(groups.regex)) {
groups_all[grepl(groups.regex[group.name], counts$sample)] <- group.name
}
}
if(!is.null(groups.samples)) {
# using regex for groups
assert("groups.samples must be a list if used, with vectors with names of at least one of 'case', 'control' or 'null'.",
is.list(groups.samples),
length(groups.samples) > 0,
!is.null(names(groups.samples)),
is.element(names(groups.samples), c("case", "control", "null"))
)
assert("groups.samples does not currently accept multiple of the same names for groups, please put all sample names in the one vector under that name",
length(unique(names(groups.samples))) == length(names(groups.samples)) )
if(length(groups.samples) == 1 && names(groups.samples) == "case") {
# only cases described, so make other samples controls
groups_all <- factor(rep("control", dim(counts)[1]), levels = c("case", "control"))
for(sample in groups.samples$case) {
groups_all[sample == counts$sample] <- "case"
}
} else {
groups_all <- factor(rep(NA, dim(counts)[1]), levels = names(groups.regex))
for(group.name in names(groups.samples)) {
for(sample in groups.samples[[group.name]]) {
groups_all[sample == counts$sample] <- group.name
}
}
}
}
counts$group <- as.factor(groups_all)
return(list(data = counts, db = database))
}
#' @export
exstra_score_new_ <- function(data, db) {
assert("data must be of class data.frame", inherits(data, "data.frame"))
assert("db must be of class exstra_db", inherits(db, "exstra_db"))
data <- data.table(data)
if(!is.element("locus", colnames(data))) {
if(is.element("disease", colnames(data))) {
setnames(data, "disease", "locus")
} else if(is.element("STR", colnames(data))) {
setnames(data, "STR", "locus")
} else {
stop("Can't find the column with locus name")
}
}
setkey(data, locus, sample)
samples <- unique(data[, c("sample", "group"), with = F])
samples$sample <- as.character(samples$sample)
samples$plotname <- NA_character_
samples$sex <- factor(NA, c("male", "female"))
setkey(samples, sample)
structure(list(data = data.table(data), db = db, samples = samples), class = c("exstra_score")) #TODO also inherit from exstra_db
}
#' @export
print.exstra_score <- function(x, ...) {
cat(class(x)[1], " object with ", dim(x$data)[1], " observations of type ", x$db$input_type, "($data),\n",
" for ", dim(x$samples)[1], " samples. ($samples)\n",
" Includes associated STR database of ", dim(x$db$db)[1], " loci. ($db)\n",
sep = "")
}
#' @export
loci.exstra_score <- function(data) {
loci(data$db)
}
#' @export
plot_names.exstra_score <- function(strscore, names) {
# gives the plot names for given sample names
strscore$samples[as.character(names), plotname]
}
#' @export
`plot_names.exstra_score<-` <- function(data, labels) {
assert("data must be of class exstra_db", inherits(data, "exstra_db"))
data$samples[names(labels), plotname := labels]
}
# This function allows square brackets to be used to select out the locus and sample
# BIG TODO: always list by locus, throughout the code!!!
#' @export
`[.exstra_score` <- function(x, loc, samp) {
assert("locus is not the key of x$data", key(x$data)[1] == "locus")
assert("sample is not the key of x$samples", key(x$samples)[1] == "sample")
assert("locus not the key of x$db$db", key(x$db$db)[1] == "locus")
if(!missing(loc)) {
x$db$db <- x$db$db[eval(substitute(loc))]
}
if(!missing(samp)) {
x$samples <- x$samples[eval(substitute(samp))]
}
x$data <- x$data[x$db$db$locus][sample %in% x$samples$sample]
x
}
#' @export
plot.exstra_score <- function(rsc, locus = NULL, sample_col = NULL, refline = TRUE, ylab="Fn(x)", verticals = TRUE,
pch = 19, xlim, ylim = c(0,1), alpha_control = 0.5, alpha_case = NULL,
xlinked = "all", xlinked.safe = TRUE, ...) {
# Plot ECDFs of rep score data
# sample_col should be a named vector, sample names as the name and color as the value
# refline: if TRUE, include reference
# xlinked: For loci on X chromosome, "all" for all samples, "male" and "female" for only that sex
if(is.null(locus)) {
strlocis <- loci(rsc)
} else {
strlocis <- locus
}
if(!missing(xlim)) {
xlim_1 <- xlim
}
assert('In plot.exstra_score(), must have xlinked one of "all", "male", "female" or "both"', xlinked %in% c("all", "male", "female", "both"))
if(xlinked == "both") {
xlinked_loop <- c("male", "female")
} else {
xlinked_loop <- xlinked
}
for(locus.name in strlocis) {
#strrir.trim <- trim.rep_in_read_data(strrir, trimming)
for(xlinked in xlinked_loop) {
main.title <- paste(loci_text_info(rsc$db, locus.name), "score ECDF")
if(xlinked != "all" && grepl("X", str_score_fil$db$db[locus.name]$Gene.location)) {
plot_data <- str_filter_sex(rsc, xlinked, xlinked.safe)$data[locus == locus.name]
main.title <- paste(main.title, paste0(xlinked, 's'))
} else {
plot_data <- rsc$data[locus.name]
}
if(missing(xlim)) {
xlim_1 <- c(0, max(plot_data$mlength))
}
plot(NA,
xlim = xlim_1,
ylim = ylim,
main = main.title,
xlab = "Repeated bases (x)",
ylab = ylab,
cex.main = 1,
...)
grid(col = "grey80")
if(refline) {
abline(v = loci_normal_exp(rsc$db, locus.name), col = c("blue", "red"), lty = 3:4)
}
black_trans <- rgb(0, 0, 0, alpha = alpha_control)
if(is.null(sample_col)) {
sample_col = ifelse(rsc$samples$group == 'case', rgb(1, 0, 0, alpha_case), black_trans)
names(sample_col) <- rsc$samples$sample
}
if(!is.null(alpha_case)) {
sample_col <- add.alpha(sample_col, alpha_case)
}
for(samp in c(setdiff(unique(plot_data$sample), names(sample_col)), intersect(unique(plot_data$sample), names(sample_col)))) {
plot(ecdf(plot_data[sample == samp, rep]), add = T, col = replace(sample_col[samp], is.na(sample_col[samp]), black_trans), verticals = verticals,
pch = pch, ...)
}
}
}
}
# TODO: easy renaming of samples
#' @export
loci_text_info.exstra_score <- function(x, ...) {
loci_text_info(x$db, ...)
}
#' @export
copy.exstra_score <- function(x) {
x$data <- copy(x$data)
x$db <- copy(x$db)
x
}
#TODO length(exstra_score) = number of data points = length(exstra_score$data)
#TODO dim(exstra_score) = c(#loci, #samples)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.