# Install Package: Ctrl + Shift + B
# devtools::document()
#' create CGDS object
#'
#' Imports:
#' cgdsr
#'
#' @param address string, web address
#' @return cdgs object, and prints a test and the first 2 columns of the cdgs object
#' @details uses cgdsr::CGDS(address)
#' @examples
#' cgds <- setCGbase()
#' @export
setCGbase <- function(address="https://www.cbioportal.org/"){
#==cdgs object==#
cgds <- cgdsr::CGDS("https://www.cbioportal.org/")
#==user info print==#
print( cgdsr::test(cgds) )
print( cgdsr::getCancerStudies(cgds)[,1:2] )
#==output==#
cgds
}
#' get name of a study
#'
#' Imports:
#' cgdsr
#'
#' @param cgds output from the setCGbase() function
#' @param studyNumber integer, index number of the study
#' @return study name, and prints caselist and genetic profiles
#' @details uses cgdsr::getCancerStudies(cgds)
#' @examples
#' cgds <- setCGbase()
#' study <- setCGstudy(cgds, 5)
#' @export
setCGstudy <- function(cgds, studyNumber){
#==get specific study from the object==#
study <- cgdsr::getCancerStudies(cgds)[studyNumber,1]
#==user info print==#
print( cgdsr::getCaseLists(cgds, study)[,2:3] )
print( cgdsr::getGeneticProfiles(cgds, study)[,1:2] )
#==output==#
study
}
#' get clinical data from patients with specific data about genes
#'
#' Imports:
#' cgdsr
#'
#' @param cgds output from the setCGbase() function
#' @param study ouput from the setCGstudy() function
#' @param profile integer, profile name (what kind of data, RNA-Seq, Mutations, ...) obtained from the printout of the setCGstudy() function
#' @param caselist integer, caselist name (which samples) obtained from the printout of the setCGstudy() function
#' @param genes character vector, genes of interest, Symbols, EntrezIds and EnsemblIds work, maybe others too.
#' @return data.frame, clinical data of the patients (depending on what caselist was chosen) with data about the genes of interest (RNA-Seq counts, mutations occurences) depending on what profile was chosen
#' @details profile and caselist should be logically conntected, e.g. the caselist "samples with mutation data" would go along well with the profile "Mutations"
#' @examples
#' cgds <- setCGbase()
#' study <- setCGstudy(cgds, 5)
#' clinic <- setCGclinic(cgds, study, 2, 1, c("ACTA1","HOXA9"))
#' @export
setCGclinic <- function(cgds, study, profile, caselist, genes){
#==get caselist (samples), profile (type of experiment) and the correpsonding data for the genes of interest==#
caselist <- cgdsr::getCaseLists(cgds, study)[caselist,1]
profile <- cgdsr::getGeneticProfiles(cgds, study)[profile,1]
data1 <- cgdsr::getProfileData(cgds, genes, profile, caselist)
#==get clinical data (patient info) for the data==#
clinic <- cgdsr::getClinicalData(cgds,caselist)
clinic <- transform(merge(clinic, data1, by=0, sort=F), row.names=Row.names, Row.names=NULL)
#==output==#
clinic
}
#' get clinical data from patients with specific data about genes
#'
#' Imports:
#' cgdsr,
#' survival,
#' survminer
#'
#' @param clinic output from the setCGclinic() function
#' @param gene character, gene of interest which had to be one of the genes specified in the setCGclinic() function. If you want to split only by genotype, use the splitByCol.
#' @param percsplit integer, how many percentiles are calculated? automactically controls what percentiles are calculated (2: 0th,50th; 3: 0th,33th,66th; 4: 0th,25th,50th,75th)
#' @param percs numbers, which percentiles should be displayed in the survival plot (e.g. 0, 50, 33, 90, etc.)
#' @param splitByCol character, a column name by which the groups should be split (in addition to the gene), e.g. whether or not another gene is mutated
#' @return list with 3 items: [[1]] a survival ggplot, [[2]] the extended clinic data.frame, [[3]] a summary, [[4]] the p value
#' @details uses the OS_MONTHS column of the clinic-data.frame to calculate survival
#' @examples
#' cgds <- setCGbase()
#' study <- setCGstudy(cgds, 5)
#' clinic <- setCGclinic(cgds, study, 2, 1, c("ACTA1","HOXA9"))
#' survival <- setCGsurvival(clinic, "HOXA9")
setCGsurvival <- function(clinic0, gene=NA, percsplit=2, percs=NA, splitByCol=NA){
#==add new columns for survival==#
clinic <- cbind(clinic0, deceased=grepl("DECEASED", clinic0[,"OS_STATUS"]))
#==filter for rows with information about the gene of interest==#
out <- NA
if(!is.na(gene)){
clinic <- clinic[!is.na(clinic[,gene]),]
}
if(nrow(clinic)==0){
list(kaplanMeier=NA, table=NA, summary=NA, pval=1)
}else{
#==determine quantiles and split the table depending on the splitByCol parameter==#
quantiles <- seq(0, 1-1/percsplit, 1/percsplit)
if(!is.na(splitByCol)){
separated <- split.data.frame(clinic, clinic[,splitByCol])
}else{
separated <- list(clinic)
}
#==if a gene of interest if specified, classify patients into quantiles based on their gene expression value==#
if(!is.na(gene)){
df1 <- lapply(separated, function(x) x[,gene,drop=F]) #get a list, each item is a single column of the gene's values
df2 <- lapply(df1, function(x) quantile(x[,1], quantiles, na.rm=T)) #get a list of quantile vectors. Each vector has z items (z=2 for 0th&50th quantile, z=3 for 0th&33th&66th quantile)
df3 <- mapply(function(x,y) lapply(x, function(z) rownames(y)[y>=z]), x=df2, y=df1, SIMPLIFY=F) #get a list where each patient ID is stored under a given quantile
for(i in seq(df3)){
a <- df3[[i]]
for(j in 1:(length(a)-1)){ a[[j]] <- a[[j]][ !a[[j]] %in% a[[j+1]] ] }
df3[[i]] <- a
}
df4 <- lapply(df3, function(z) mapply(function(x,y) data.frame(percentile=y, patient=x), x=z, y=paste0("perc", quantiles*100), SIMPLIFY=F))
df4 <- lapply(df4, function(x) do.call(rbind, x))
df4 <- do.call(rbind, df4)
}else{
df4 <- data.frame(percentile=rep(1,nrow(clinic)), patient=rownames(clinic))
}
#==merge original table with the quantile classification for each patient==#
df5 <- transform(merge(clinic, df4, by.x=0, by.y="patient"), row.names=Row.names, Row.names=NULL)
#==add a column that will be used during plotting==#
if(is.na(splitByCol)){
df5 <- cbind(df5, percentileSplitBy=df5$percentile)
}else{
df5 <- cbind(df5, percentileSplitBy=paste0(df5[,splitByCol], "_", df5$percentile))
}
#==if certain percentiles were defined, keep only those==#
if(!is.na(percs[1])){
df5 <- subset(df5, percentile %in% paste0("perc",percs))
}
#==calculate survival plot==#
surv1 <- survival::survfit(survival::Surv(OS_MONTHS, deceased) ~percentileSplitBy, data=df5)
plot1 <- survminer::ggsurvplot(surv1, data = df5, conf.int = F, pval = T)
if(!is.na(gene) | !is.na(splitByCol)){pval <- survminer::surv_pvalue(surv1, data=df5)$pval}else{pval <- NA}
#==output==#
list(kaplanMeier=plot1, table=df5, summary=surv1, pval=pval)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.