Nothing
#' @title readSegment
#'
#' @param segFile The segment file.
#' @param gisticAmpGenesFile Amplification Genes file generated by GISTIC. Default NULL.
#' @param gisticDelGenesFile Deletion Genes file generated by GISTIC. Default NULL.
#' @param gisticAllLesionsFile Information of all lesions generated by GISTIC. Default NULL.
#' @param gistic.qval The threshold of gistic Q value. Default 0.25.
#' @param verbose Whether to display details in the console. Default TRUE.
#' @param min.seg.size The smallest size of segments. Default 500.
#' @param txdb A TxDb object. i.e., TxDb.Hsapiens.UCSC.hg19.knownGene. Default NULL.
#' @param min.overlap.len The minimum insertion size of segment and gene. Default 50.
#' @param ... ... Other options passed to \code{\link{cna2gene}}.
#'
#' @examples
#' segFile <- system.file("extdata", "CRC_HZ.seg.txt", package = "MesKit")
#' gisticAmpGenesFile <- system.file("extdata", "COREAD_amp_genes.conf_99.txt", package = "MesKit")
#' gisticDelGenesFile <- system.file("extdata", "COREAD_del_genes.conf_99.txt", package = "MesKit")
#' gisticAllLesionsFile <- system.file("extdata", "COREAD_all_lesions.conf_99.txt", package = "MesKit")
#' seg <- readSegment(segFile = segFile,
#' gisticAmpGenesFile = gisticAmpGenesFile,
#' gisticDelGenesFile = gisticDelGenesFile,
#' gisticAllLesionsFile = gisticAllLesionsFile)
#'
#' @return a list of segmentation data frame
#' @importFrom data.table foverlaps as.data.table setkey rbindlist
#' @export readSegment
#'
readSegment <- function(segFile,
gisticAmpGenesFile = NULL,
gisticDelGenesFile = NULL,
gisticAllLesionsFile = NULL,
gistic.qval = 0.25,
min.seg.size = 500,
txdb = NULL,
min.overlap.len = 50,
verbose = TRUE,
...){
seg <- suppressWarnings(data.table::fread(segFile, header=TRUE, sep="\t", stringsAsFactors = FALSE))
## valid seg
seg <- validSeg(seg)
if(!"CopyNumber" %in% colnames(seg)){
if("SegmentMean" %in% colnames(seg)){
seg$SegmentMean <- as.numeric(seg$SegmentMean)
seg$CopyNumber <- round(2^(seg$SegmentMean)*2)
if("Tumor_Sample_Label" %in% colnames(seg)){
seg <- dplyr::select(seg, "Patient_ID", "Tumor_Sample_Barcode", "Tumor_Sample_Label", "Chromosome", "Start_Position", "End_Position", "CopyNumber")
}else{
seg <- dplyr::select(seg, "Patient_ID", "Tumor_Sample_Barcode", "Chromosome", "Start_Position", "End_Position", "CopyNumber")
}
}
else{
stop("Neither copy number nor SegmentMean information was found in seg object.")
}
}
Minor_Major_CN <- data.frame()
if("Minor_CN" %in% colnames(seg) & "Major_CN" %in% colnames(seg)){
Minor_Major_CN <- seg %>%
dplyr::select("Patient_ID",
"Tumor_Sample_Barcode",
"Chromosome",
"Start_Position",
"End_Position",
"Minor_CN",
"Major_CN")
}
seg$CopyNumber <- as.numeric(seg$CopyNumber)
seg$Width <- seg$End_Position - seg$Start_Position
seg <- dplyr::mutate(seg, Type = unlist(lapply(seg$CopyNumber, function(x){
if(x == 0){
return("Deletion")
}
else if(x == 1){
return("Loss")
}
else if(x == 2){
return("Neutral")
}
else if(x == 3){
return("Gain")
}
else if(x >=4){
return("Amplification")
}
}))) %>%
dplyr::filter(.data$Width > min.seg.size)
if("Tumor_Sample_Label" %in% colnames(seg)){
seg <- seg %>%
dplyr::select("Patient_ID", "Tumor_Sample_Barcode", "Tumor_Sample_Label", "Chromosome", "Start_Position", "End_Position", "CopyNumber", "Type") %>%
as.data.table()
}else{
seg <- seg %>%
dplyr::select("Patient_ID", "Tumor_Sample_Barcode", "Chromosome", "Start_Position", "End_Position", "CopyNumber", "Type") %>%
as.data.table()
}
seg$Chromosome <- as.numeric(seg$Chromosome)
seg$Start_Position <- as.numeric(seg$Start_Position)
seg$End_Position <- as.numeric(seg$End_Position)
data.table::setkey(x = seg, "Chromosome", "Start_Position", "End_Position")
gisticCNVgenes <- data.table::data.table()
if(!is.null(gisticAmpGenesFile)){
if(verbose){
cat(paste0('--Processing ', basename(gisticAmpGenesFile), '\n'))
}
gisticAmpGenes <- readGisticGene(gisticAmpGenesFile, "AMP")
gisticCNVgenes <- rbind(gisticCNVgenes, gisticAmpGenes)
}
if(!is.null(gisticDelGenesFile)){
if(verbose){
cat(paste0('--Processing ', basename(gisticDelGenesFile), '\n'))
}
gisticDelGenes <- readGisticGene(gisticDelGenesFile, "Del")
gisticCNVgenes <- rbind(gisticCNVgenes, gisticDelGenes)
}
gisticLesions <- data.table::data.table()
if(!is.null(gisticAllLesionsFile)){
gisticLesions <- as.data.table(readGisticAllLesions(gisticAllLesionsFile, verbose = verbose)) %>%
dplyr::rename("Wide_Peak_Boundaries" = "WPB")
if("Amp" %in% gisticLesions$Gistic_type){
gisticLesions[gisticLesions$Gistic_type == "Amp",]$Gistic_type <- "AMP"
}
}
if(nrow(gisticCNVgenes) != 0){
if(!is.null(gisticAllLesionsFile)){
gisticLesions$ID <- tidyr::unite(gisticLesions,"ID","Cytoband","Wide_Peak_Boundaries","Gistic_type",sep = "_")$ID
gisticCNVgenes$ID <- tidyr::unite(gisticCNVgenes,"ID","Cytoband","Wide_Peak_Boundaries","Gistic_type",sep = "_")$ID
gisticCNVgenes <- merge(gisticLesions,gisticCNVgenes,by = "ID") %>%
dplyr::select("Cytoband.x", "Wide_Peak_Boundaries.x", "Gene", "Gistic_type.x", "Qvalue") %>%
dplyr::rename("Cytoband" = "Cytoband.x", "Wide_Peak_Boundaries" = "Wide_Peak_Boundaries.x",
"Gistic_type" = "Gistic_type.x") %>%
dplyr::filter(.data$Qvalue < gistic.qval) %>% data.table::as.data.table()
}
## combine segment file and gistic results
gisticCNVgenes$Chromosome <- unlist(lapply(gisticCNVgenes$Wide_Peak_Boundaries,function(x)strsplit(x, ":")[[1]][1]))
gisticCNVgenes$loc <- unlist(lapply(gisticCNVgenes$Wide_Peak_Boundaries,function(x)strsplit(x, ":")[[1]][2]))
gisticCNVgenes$Start_Position <- unlist(lapply(gisticCNVgenes$loc,function(x)strsplit(x, "-")[[1]][1]))
gisticCNVgenes$Start_Position <- as.numeric(gisticCNVgenes$Start_Position)
gisticCNVgenes$End_Position <- unlist(lapply(gisticCNVgenes$loc,function(x)strsplit(x, "-")[[1]][2]))
gisticCNVgenes$End_Position <- as.numeric(gisticCNVgenes$End_Position)
gisticCNVgenes$Cytoband_pos <- as.integer(gisticCNVgenes$Start_Position + (gisticCNVgenes$End_Position - gisticCNVgenes$Start_Position)/2)
gisticCNVgenes$Chromosome <- gsub(pattern = 'chr', replacement = '', x = gisticCNVgenes$Chromosome, fixed = TRUE)
gisticCNVgenes$Chromosome <- gsub(pattern = 'X', replacement = '23', x = gisticCNVgenes$Chromosome, fixed = TRUE)
gisticCNVgenes$Chromosome <- gsub(pattern = 'Y', replacement = '24', x = gisticCNVgenes$Chromosome, fixed = TRUE)
gisticCNVgenes <- gisticCNVgenes %>%
dplyr::select(.data$Cytoband,
.data$Cytoband_pos,
.data$Gene,
.data$Gistic_type,
.data$Chromosome,
.data$Start_Position,
.data$End_Position) %>%
dplyr::distinct(.data$Cytoband, .keep_all = TRUE)
gisticCNVgenes$Chromosome <- as.numeric(gisticCNVgenes$Chromosome)
gisticCNVgenes$Start_Position <- as.numeric(gisticCNVgenes$Start_Position)
gisticCNVgenes$End_Position <- as.numeric(gisticCNVgenes$End_Position)
data.table::setkey(x = gisticCNVgenes,"Chromosome", "Start_Position", "End_Position")
mapDat <- data.table::foverlaps(gisticCNVgenes,seg, by.x = c("Chromosome","Start_Position","End_Position")) %>%
na.omit()
if("Tumor_Sample_Label" %in% colnames(mapDat)){
mapDat <- mapDat %>%
dplyr::select("Patient_ID", "Tumor_Sample_Barcode", "Tumor_Sample_Label", "Cytoband", "Cytoband_pos", "Chromosome", "Start_Position", "End_Position", "CopyNumber", "Type", "Gistic_type") %>%
as.data.table() %>%
dplyr::distinct()
}else{
mapDat <- mapDat %>%
dplyr::select("Patient_ID", "Tumor_Sample_Barcode", "Cytoband", "Cytoband_pos", "Chromosome", "Start_Position", "End_Position", "CopyNumber", "Type", "Gistic_type") %>%
dplyr::distinct()
}
seg <- mapDat[(mapDat$Type %in% c("Loss", "Deletion") & mapDat$Gistic_type == "Del")|(mapDat$Type %in% c("Gain", "Amplification") & mapDat$Gistic_type == "AMP"),]
}else if(nrow(gisticLesions) != 0){
gisticLesions$Chromosome <- unlist(lapply(gisticLesions$Wide_Peak_Boundaries,function(x)strsplit(x, ":")[[1]][1]))
gisticLesions$loc <- unlist(lapply(gisticLesions$Wide_Peak_Boundaries,function(x)strsplit(x, ":")[[1]][2]))
gisticLesions$Start_Position <- unlist(lapply(gisticLesions$loc,function(x)strsplit(x, "-")[[1]][1]))
gisticLesions$Start_Position <- as.numeric(gisticLesions$Start_Position)
gisticLesions$End_Position <- unlist(lapply(gisticLesions$loc,function(x)strsplit(x, "-")[[1]][2]))
gisticLesions$End_Position <- as.numeric(gisticLesions$End_Position)
gisticLesions$Cytoband_pos <- as.integer(gisticLesions$Start_Position + (gisticLesions$End_Position - gisticLesions$Start_Position)/2)
gisticLesions$Chromosome <- gsub(pattern = 'chr', replacement = '', x = gisticLesions$Chromosome, fixed = TRUE)
gisticLesions$Chromosome <- gsub(pattern = 'X', replacement = '23', x = gisticLesions$Chromosome, fixed = TRUE)
gisticLesions$Chromosome <- gsub(pattern = 'Y', replacement = '24', x = gisticLesions$Chromosome, fixed = TRUE)
gisticLesions <- dplyr::select(gisticLesions,"Cytoband", "Cytoband_pos", "Gistic_type", "Chromosome", "Start_Position", "End_Position", "Qvalue")
gisticLesions$Chromosome <- as.numeric(gisticLesions$Chromosome)
gisticLesions$Start_Position <- as.numeric(gisticLesions$Start_Position)
gisticLesions$End_Position <- as.numeric(gisticLesions$End_Position)
data.table::setkey(x = gisticLesions, "Chromosome", "Start_Position", "End_Position")
mapDat <- data.table::foverlaps(gisticLesions,seg, by.x = c("Chromosome","Start_Position","End_Position")) %>%
na.omit() %>%
dplyr::filter(.data$Qvalue < gistic.qval)
if("Tumor_Sample_Label" %in% colnames(mapDat)){
mapDat <- mapDat %>%
dplyr::select("Patient_ID", "Tumor_Sample_Barcode", "Tumor_Sample_Label", "Cytoband", "Cytoband_pos", "Chromosome", "Start_Position", "End_Position", "CopyNumber", "Type", "Gistic_type") %>%
dplyr::distinct() %>%
data.table::as.data.table()
}else{
mapDat <- mapDat %>%
dplyr::select("Patient_ID", "Tumor_Sample_Barcode", "Cytoband", "Cytoband_pos", "Chromosome", "Start_Position", "End_Position", "CopyNumber", "Type", "Gistic_type") %>%
dplyr::distinct() %>%
data.table::as.data.table()
}
seg <- mapDat[(mapDat$Type %in% c("Loss", "Deletion") & mapDat$Gistic_type == "Del")|(mapDat$Type %in% c("Gain", "Amplification") & mapDat$Gistic_type == "AMP"),]
}
data.table::setkey(x = seg, "Patient_ID", "Tumor_Sample_Barcode", "Chromosome", "Start_Position", "End_Position")
seg$Chromosome = gsub(pattern = '23', replacement = 'X', x = seg$Chromosome, fixed = TRUE)
seg$Chromosome = gsub(pattern = '24', replacement = 'Y', x = seg$Chromosome, fixed = TRUE)
if(!is.null(txdb)){
seg <- cna2gene(seg, txdb = txdb, min.overlap.len = min.overlap.len, ...)
}
if(nrow(Minor_Major_CN) > 0){
Minor_Major_CN$Chromosome <- as.character(Minor_Major_CN$Chromosome)
seg$Chromosome <- as.character(seg$Chromosome)
seg <- dplyr::left_join(
seg,
Minor_Major_CN,
by = c("Patient_ID",
"Tumor_Sample_Barcode",
"Chromosome",
"Start_Position",
"End_Position")
)
}
result <- split(seg, seg$Patient_ID)
if(length(result) == 1){
return(result[[1]])
}else{
return(result)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.