# Install Package: Ctrl + Shift + B
# devtools::document()
deepInfo <- function(){
cat("
deepFilter |||takes: attribute specifics |||gives: cell line table |
deepResult |||takes: deepFilter output |||gives: data about cell lines |
deepPCA |||takes: deepResults output |||gives: PCA |
")
}
#' Filter the depmap annotation file for cell lines of interest.
#'
#' @param DepMap_ID e.g. "ACH-000012"
#' @param cell_line_name e.g. "HCC827"
#' @param CCLE_Name e.g. "HCC827_LUNG"
#' @param alias alias
#' @param COSMIC_ID e.g. "1240146"
#' @param lineage lineage, e.g. "leukemia" or "lung"
#' @param lineage_subtype sublineage, e.g. "AML", "ALL", etc.
#' @param lineage_sub_subtype Subsublineage
#' @param lineage_molecular_subtype lineage molecular subtype
#' @param sex either "Male" or "Female"
#' @param source e.g. "Sanger", "DSMZ"
#' @param Achilles_n_replicates minimum number of replicates
#' @param max_cell_line_NMMD negative number, NMD = nonsense-medieated mRNA decay
#' @param min_cell_line_NMMD negative number, NMD = nonsense-medieated mRNA decay
#' @param culture_type mostly "Adherent" or "Suspension"
#' @param culture_medium culture medium
#' @param Cas9_activity minimum Cas9 activity, positive number
#' @param RRID e.g. "CVCL_0001"
#' @param sample_collection_site e.g. "lung"
#' @param primary_or_metastasis either "Primary" or "Metastasis"
#' @param disease disease of the patient, e.g. "Lung Cancer"
#' @param disease_subtype subdisease of the patient
#' @param ageMin integer, minimum patient age
#' @param ageMax integer, maximum patient age
#' @param Sanger_model_ID e.g. "SIDM01067"
#' @param additional_info additional information
#' @return A data.frame where each row is a human cell line, and each column is a parameter of information about the cell line (e.g. ID, tissue, etc.). Note that terms are searched for with grepl, so use ^ and $ to determine ending and start if need be.
#' @examples
#' head(deepFilter())
#' a <- seepFilter(lineage_subtype="^AML$", ageMin=60)
deepFilter <- function(DepMap_ID=NA, stripped_cell_line_name=NA, CCLE_Name=NA, alias=NA, COSMIC_ID=NA,
lineage=NA, lineage_subtype=NA, lineage_sub_subtype=NA, lineage_molecular_subtype=NA,
sex=NA, source=NA, Achilles_n_replicates=NA,
max_cell_line_NNMD=NA, min_cell_line_NNMD= -Inf,
culture_type=NA, culture_medium=NA, cas9_activity=NA, RRID=NA, sample_collection_site=NA,
primary_or_metastasis=NA, disease=NA, disease_subtype=NA, ageMin=0, ageMax=200, Sanger_model_ID=NA, additional_info=NA){
## get a data.frame with all input values (except for 3 values, which are set to NA for now)
input <- c(DepMap_ID, stripped_cell_line_name, CCLE_Name, alias, COSMIC_ID,
lineage, lineage_subtype, lineage_sub_subtype, lineage_molecular_subtype,
sex, source, Achilles_n_replicates, max_cell_line_NNMD,
culture_type, culture_medium, cas9_activity, RRID, sample_collection_site,
primary_or_metastasis, disease, disease_subtype, Sanger_model_ID, additional_info)
## get a data.frame originating from dmMeta, where rows are filtered by numbers
output <- as.data.frame(dmMeta[dmMeta$age>=ageMin & dmMeta$age<=ageMax,])
## get a vector with search terms
terms <- input[!is.na(input)]
## get a vector with column numbers for each search term
colnum <- seq(input)[!is.na(input)]
## filter the data.frame in each relevant column
for(i in seq(terms)){
output <- subset(output, grepl(terms[i], output[,colnum[i]]))
}
output
}
#' Get the results for a certain filter.
#'
#' @param cellLines Any data.table with DepMap_IDs for cell lines (ACH-000002 etc.) in its 1st column and cell line names in its 2nd column. Ideally generated by dmFilter().
#' @param dataset Any data.table with DepMap_IDs in its 1st column. Inbuilt are dmExpr (default),dmDep, dmDrug and dmProt.
#' @return A data.frame where each row is a cell line is a gene and each column is a cell line.
#' @examples
#' a <- deepFilter(lineage="leukemia", ageMin=60)
#' b <- deepResult(a, dmDep)
deepResult <- function(cellLines, dataset=dmExpr){
merged <- merge(cellLines[,1:2], dataset, by="DepMap_ID")
output <- t(merged[,3:ncol(merged)])
output <- as.data.frame(output)
colnames(output) <- unlist(merged[,2])
## for dmExpr and dmDep, change rownames and add an EntrezID column
if( grepl( ")$", colnames(dataset)[2] ) ){
output <- cbind(EntrezID=as.character(gsub("^.*\\(", "", gsub("\\)", "", rownames(output)))), output)
rownames(output) <- as.character(gsub(" \\(.*\\)$", "", rownames(output)))
}
output
}
#' Generate a PCA plot from DepMap data
#'
#' @param deepResult Any data.table with DepMap_IDs for cell lines (ACH-000002 etc.) in its 1st column and cell line names in its 2nd column. Ideally generated by dmFilter().
#' @param groupBy vector of groupBy names to be extracted
#' @param size dot size
#' @param deepFilter table with metadata about cell lines. Can be generated with deepFilter(), or leave as NA to use a default
#' @return ggplot2 object. Legend title can be set with ggplot2::labs(color=...). Axis labels can be set with ggplot2::xlab(...) and ylab(...)
#' @examples
#' df1 <- deepFilter(disease="Leukemia")
#' df2 <- deepResult(df1)
#' pca1 <- deepPCA(df2, "lineage_subtype")
deepPCA <- function(deepResult, groupBy=NA, size=5, deepFilter=NA, labels=F){
if(colnames(deepResult)[1] %in% "EntrezID"){
deepResult <- deepResult[,-1]
}
# get the desired groupBy from all relevant cell lines
if(is.na(deepFilter)){meta <- dmMeta}else{meta <- deepFilter}
meta <- as.data.frame(meta)
meta <- meta[meta$stripped_cell_line_name %in% colnames(deepResult), groupBy, drop=F]
# perform PCA on data, then combine with meta data about the cell lines
pca1 <- prcomp( t(na.omit(deepResult)), center=T )
pca2 <- cbind( as.data.frame(pca1$x[,1:2]), cells=rownames(pca1$x), meta)
# get PC variance percentages
percs <- 100*round((pca1$sdev^2)/sum(pca1$sdev^2)[1:2], 3)
# plot
plot1 <- ggplot2::ggplot( pca2, ggplot2::aes(x=PC1, y=PC2, color=eval(parse(text=groupBy))) ) +
ggplot2::geom_point(size=size) +
ggplot2::xlab(paste0("PC1 (", percs[1], " %)")) +
ggplot2::ylab(paste0("PC2 (", percs[2], " %)")) +
ggplot2::labs(color=groupBy) #legnd title
if(labels){
plot1 <- plot1 +
ggrepel::geom_label_repel(data = pca2, ggplot2::aes(label=cells), min.segment.length = 0.25, force = 6,
point.padding = 0.5, size=size, segment.size=0.5, nudge_x=-5, fill="white")
}
plot1
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.