#' Generates filtered sequence name list seperated on group
#'
#' \code{PAC_filtsep} Group seperated filtered name list
#'
#' Given a PAC object the function will extract sequences within the
#' given filter within a given group.
#'
#' @family PAC analysis
#'
#' @seealso \url{https://github.com/Danis102} for updates on the current
#' package.
#'
#' @param PAC PAC-list object containing an Pheno data.frame with samples as row
#' names and a Counts table with raw counts or normalized counts table
#' containing for example counts per million (cpm; generated by
#' \code{PAC_norm}).
#'
#' @param threshold Integer giving the threshold in counts PAC$Counts or
#' normalized counts (table in PAC$norm) that needs to be reached for a
#' sequence to be included (default=10).
#'
#' @param coverage Integer giving the percent of independent samples of each
#' group that needs to reach the threshold for a sequence to be included
#' (default=100).
#'
#' @param norm Character specifying if filtering should be done using "counts",
#' "cpm" or another normalized data table in PAC$norm (default="counts").
#'
#' @param pheno_target (optional) List with:
#' 1st object being a character vector of target column in Pheno,
#' 2nd object being a character vector of the target group(s)
#' in the target Pheno column (1st object).
#' (default=NULL)
#'
#' @param output Specifies the output format. If output="sequence" (default),
#' then a data.frame is returned where each column contains the sequences
#' names that passed the filter for a specific group specified in
#' pheno_target. If output="binary", then the resulting data.frame will be
#' converted into a binary (hit=1, no hit=0) data.frame. See
#' \code{\link{filtsep_bin}}.
#'
#' @return A data.frame (see output for details).
#'
#'
#' @examples
#'
#' load(system.file("extdata", "drosophila_sRNA_pac_filt_anno.Rdata",
#' package = "seqpac", mustWork = TRUE))
#'
#' ## Keep sequences with 5 counts (threshold) in 100% (coverage) of
#' ## samples in a group:
#' # Use PAC_filtsep to find sequences
#' filtsep <- PAC_filtsep(pac, norm="counts", threshold=5,
#' coverage=100, pheno_target= list("stage"))
#'
#' # Filter by unique sequences passing filtsep
#' filtsep <- unique(do.call("c", as.list(filtsep)))
#' pac_filt <- PAC_filter(pac, subset_only = TRUE, anno_target= filtsep)
#'
#' # Find overlap
#' olap <- reshape2::melt(filtsep,
#' measure.vars = c("Stage1", "Stage3", "Stage5"),
#' na.rm=TRUE)
#'
#' ## Upset plot using the UpSetR package
#' # (when output="binary" PAC_filtsep uses filtsep_bin for binary conversion
#' # Use PAC_filtsep with binary output
#' filtsep_bin <- PAC_filtsep(pac, norm="counts", threshold=5,
#' coverage=100, pheno_target= list("stage"),
#' output="binary")
#'
#' # Plot Wenn diagram or UpSetR
#' #
#' # plot(venneuler::venneuler(data.frame(olap[,2], olap[,1])))
#' #
#' # UpSetR::upset(filtsep_bin, sets = colnames(filtsep_bin),
#' # mb.ratio = c(0.55, 0.45), order.by = "freq", keep.order=TRUE)
#'
#'
#'
#' @export
PAC_filtsep <- function(PAC, norm="counts", threshold=10, coverage=100,
pheno_target=NULL, output="sequence"){
## Check S4
if(isS4(PAC)){
tp <- "S4"
PAC <- as(PAC, "list")
}else{
tp <- "S3"
}
## Extract data
if(norm=="counts"){
data <- PAC$Counts
}else{
if(is.null(PAC$norm[[norm]])){
stop("\nThere is no object called '",
norm, "' in the norm list.",
"\n (Hint: Did you forget to normalize the data using for",
"\n example PAC_norm, or would you rather run the function",
"\n on raw counts using norm='counts'?)")
}
data <- PAC$norm[[norm]]
}
## Subset dataset
pheno <- PAC$Pheno
pheno$All <- "All"
if(is.null(pheno_target)){
warning(
"No grouping factor was specified with pheno_target.",
"\nCalculations are based on all samples.")
pheno_target <- list("All","All")
}else{
if(length(pheno_target)==1){
pheno_target[[2]] <- unique(pheno[, pheno_target[[1]]])
}else{
if(is.null(pheno_target[[2]])){
pheno_target[[2]] <- unique(pheno[, pheno_target[[1]]])
}}}
indx <- pheno[, pheno_target[[1]]] %in% pheno_target[[2]]
pheno <- pheno[indx,]
data <- data[,indx]
stopifnot(identical(rownames(pheno), colnames(data)))
## Extract filtered anno sequence names
start_lst <- as.list(pheno_target[[2]])
names(start_lst) <- pheno_target[[2]]
sub_data_lst <- lapply(start_lst, function(x){
y <- data[, pheno[, pheno_target[[1]]] == x]
logi <- data.frame(rowSums(y >= threshold)) >= round(ncol(y)*(coverage*0.01))
return(rownames(y)[logi])
})
nmax <- max(unlist(lapply(sub_data_lst, length)))
df <- do.call("cbind", lapply(sub_data_lst, function(x){
fix <- as.character(c(x, rep(NA, times=nmax-length(x))))
return(fix)}
))
df <- as.data.frame(df, stringsAsFactors=FALSE)
if(output=="binary"){
return(filtsep_bin(df))
}else{
return(df)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.