inst/doc/additonal_examples.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

knitr::opts_knit$set(root.dir = '.')
library(CNVScope)
options(scipen=999)
library(magrittr)

## ----aml_files,eval=F,echo=T--------------------------------------------------
#  if(!dir.exists("extracted_aml_data")){dir.create("extracted_aml_data")}
#  untar("gdc_download_aml.tar.gz",exdir = "./extracted_aml_data")
#  target_files_aml<-list.files(path = "extracted_aml_data",pattern=glob2rx("*NormalVsPrimary.tsv"),recursive=T,full.names = T)
#  print(target_files_aml)

## ----eval=F,echo=T------------------------------------------------------------
#  sample_aggregated_segvals_aml<-formSampleMatrixFromRawGDCData(tcga_files = target_files_aml,format = "TARGET")
#  saveRDS(sample_aggregated_segvals_aml,"aml_sample_matched_input_matrix.rds")

## ----aml_plots, eval=T,echo=T-------------------------------------------------
sample_aggregated_segvals_aml<-readRDS("aml_sample_matched_input_matrix.rds")
invariant_bins<-which((sample_aggregated_segvals_aml[stringr::str_detect(rownames(sample_aggregated_segvals_aml),"chr7"),] %>% t() %>% as.data.frame() %>% sapply(sd))==0)
chr_7_mat<-sample_aggregated_segvals_aml[(stringr::str_detect(rownames(sample_aggregated_segvals_aml),"chr7") & rownames(sample_aggregated_segvals_aml) %in% setdiff(rownames(sample_aggregated_segvals_aml),names(invariant_bins))),] %>% t()

## ----chr7_cor-----------------------------------------------------------------
chr_7_mat %>%  cor(use="pairwise.complete.obs",method="pearson") %>% 
  CNVScope::signedRescale(max_cap=1) %>%
  reshape2::melt()  %>%
  ggplot(aes(x=reshape2::colsplit(Var1,"_",c("chr","start","end"))$start,
             y=reshape2::colsplit(Var2,"_",c("chr","start","end"))$start,
             fill=value)) + geom_raster() +
  theme(axis.text.x = element_blank(),axis.text.y=element_blank(),axis.title = element_blank()) +
  ggplot2::scale_fill_gradient2(low = "blue", high = "red", midpoint = 0.5, limits = c(0, 1))

## ----breakpoints--------------------------------------------------------------
if((Sys.info()['sysname'] == "Linux" |
Sys.info()['sysname'] == "Windows")&requireNamespace("HiCseg",quietly = T)){
colnames(chr_7_mat)[CNVScope::getAsymmetricBlockIndices(cor(chr_7_mat,use="pairwise.complete.obs"))]
breakpoints<-colnames(chr_7_mat)[CNVScope::getAsymmetricBlockIndices(cor(chr_7_mat,use="pairwise.complete.obs"))] %>% stringr::str_split_fixed(string = .,pattern="_",n=3) %>% as.matrix() %>% .[,2] %>% as.numeric()
breakpoint_labels <- colnames(chr_7_mat)[CNVScope::getAsymmetricBlockIndices(cor(chr_7_mat,use="pairwise.complete.obs"))]
breakpoint_labels} else {
colnames(chr_7_mat)[CNVScope::getAsymmetricBlockIndices(cor(chr_7_mat,use="pairwise.complete.obs"),algorithm = "jointSeg",nb_change_max = round(min(dim(chr_7_mat))/5))$breakpoints_col]
breakpoints<-colnames(chr_7_mat)[CNVScope::getAsymmetricBlockIndices(cor(chr_7_mat,use="pairwise.complete.obs"),algorithm = "jointSeg",nb_change_max = round(min(dim(chr_7_mat))/5))$breakpoints_col] %>% stringr::str_split_fixed(string = .,pattern="_",n=3) %>% as.matrix() %>% .[,2] %>% as.numeric()
breakpoint_labels <- colnames(chr_7_mat)[CNVScope::getAsymmetricBlockIndices(cor(chr_7_mat,use="pairwise.complete.obs"),algorithm = "jointSeg",nb_change_max = round(min(dim(chr_7_mat))/5))$breakpoints_col]
breakpoint_labels  
}

## ----breakpoint_plot----------------------------------------------------------
chr_7_mat %>%  cor(use="pairwise.complete.obs",method="pearson") %>% 
    CNVScope::signedRescale(max_cap=1) %>%
    reshape2::melt()  %>%
    ggplot(aes(x=reshape2::colsplit(Var1,"_",c("chr","start","end"))$start,
               y=reshape2::colsplit(Var2,"_",c("chr","start","end"))$start,
               fill=value)) + geom_raster() +
    theme(axis.text.x = element_text(angle=90, hjust=1),axis.text.y=element_blank(),axis.title = element_blank()) +
    scale_x_continuous(breaks=breakpoints,labels=breakpoint_labels) +
    ggplot2::scale_fill_gradient2(low = "blue", high = "red", midpoint = 0.5, limits = c(0, 1))


## ----probdist-----------------------------------------------------------------
if(requireNamespace("smoothie",quietly=T)){
chr_7_probdist <- CNVScope::calcCNVKernelProbDist(cor(chr_7_mat,use="pairwise.complete.obs"),parallel=F)$percentile_matrix
js_breakpoints<-jointseg::jointSeg(chr_7_probdist,K=20)$bestBkp
js_breakpoint_labels<-colnames(chr_7_mat)[js_breakpoints]
} else{
  print("Please install smoothie in order to run this example.")
}

## ----plot_probdist------------------------------------------------------------
if(requireNamespace("smoothie",quietly=T)){
chr_7_probdist %>%  
#  CNVScope::signedRescale(max_cap=1) %>%
  reshape2::melt()  %>%
  ggplot(aes(x=Var1,
             y=Var2,
             fill=value)) + geom_tile() +
#  theme(axis.title = element_blank()) + #axis.text.x = element_blank(),axis.text.y=element_blank(),
    theme(axis.text.x = element_text(angle=90, hjust=1),
          axis.text.y = element_text(angle=0, hjust=1)
          ,axis.title = element_blank()) +
#    scale_x_continuous(breaks=js_breakpoints,labels=js_breakpoint_labels) +
#      scale_y_continuous(breaks=js_breakpoints,labels=js_breakpoint_labels) +
      scale_x_continuous(breaks=js_breakpoints,labels=js_breakpoint_labels) +
      scale_y_continuous(breaks=js_breakpoints,labels=js_breakpoint_labels) +

  ggplot2::scale_fill_gradient2(low = "blue", high = "red", midpoint = 0.5, limits = c(0, 1))
} else{
  print("Please install smoothie in order to run this example.")
}


## ----census_data,eval=F-------------------------------------------------------
#  census_data <- readRDS(system.file("censushg19.rds",package = "CNVScope"))
#  census_data[census_data@seqnames %in% "chr7"] %>% sort() %>% tibble::as_tibble() %>% janitor::clean_names() %>% dplyr::select(seqnames,start,end,gene_symbol,tumour_types_somatic,tumour_types_germline) %>% dplyr::filter(start>60e6,stringr::str_detect(string = tumour_types_somatic,pattern="AML") | stringr::str_detect(string = tumour_types_germline,pattern="AML"))

## ----blca_files,eval=F,echo=T-------------------------------------------------
#  if(!dir.exists("extracted_blca_data")){dir.create("extracted_blca_data")
#  untar("gdc_download_blca.tar.gz",exdir = "./extracted_blca_data")}
#  tcga_files_blca<-list.files(path = "extracted_blca_data",pattern=glob2rx("*.tsv"),recursive=T,full.names = T)
#  print(tcga_files_blca)

## ----eval=F,echo=T------------------------------------------------------------
#  sample_aggregated_segvals_blca<-formSampleMatrixFromRawGDCData(tcga_files = tcga_files_blca,format = "TCGA",parallel=T)
#  saveRDS(sample_aggregated_segvals_blca,"blca_sample_matched_input_matrix.rds")

## ----blca_plots, eval=T,echo=T------------------------------------------------
sample_aggregated_segvals_blca<-readRDS("blca_sample_matched_input_matrix.rds")
invariant_bins<-which((sample_aggregated_segvals_blca[stringr::str_detect(rownames(sample_aggregated_segvals_blca),"chr17"),] %>% t() %>% as.data.frame() %>% sapply(sd))==0)
chr_17_mat<-sample_aggregated_segvals_blca[(stringr::str_detect(rownames(sample_aggregated_segvals_blca),"chr17") & rownames(sample_aggregated_segvals_blca) %in% setdiff(rownames(sample_aggregated_segvals_blca),names(invariant_bins))),] %>% t()

## ----chr17_cor----------------------------------------------------------------
chr_17_mat %>%  cor(use="pairwise.complete.obs",method="pearson") %>% 
  CNVScope::signedRescale(max_cap=1) %>%
  reshape2::melt()  %>%
  ggplot(aes(x=reshape2::colsplit(Var1,"_",c("chr","start","end"))$start,
             y=reshape2::colsplit(Var2,"_",c("chr","start","end"))$start,
             fill=value)) + geom_raster() +
  theme(axis.text.x = element_blank(),axis.text.y=element_blank(),axis.title = element_blank()) +
  ggplot2::scale_fill_gradient2(low = "blue", high = "red", midpoint = 0.5, limits = c(0, 1))

## ----probdist_chr17-----------------------------------------------------------
if(requireNamespace("smoothie",quietly=T)){
chr_17_probdist <- CNVScope::calcCNVKernelProbDist(cor(chr_17_mat,use="pairwise.complete.obs"),parallel=F)$percentile_matrix
colnames(chr_17_probdist)<-colnames(chr_17_mat)
rownames(chr_17_probdist)<-colnames(chr_17_mat)
chr_17_js_breakpoints<-jointseg::jointSeg(chr_17_probdist,K=40)$bestBkp
chr_17_js_breakpoint_labels<-colnames(cor(chr_17_mat))[chr_17_js_breakpoints]
chr_17_js_breakpoint_labels
} else{
  print("Please install smoothie in order to run this example.")
}


## ----breakpoint_plot_chr17,eval=F---------------------------------------------
#  
#  breakpoint_plot_probdist <- chr_17_probdist %>% #  cor(use="pairwise.complete.obs",method="pearson") %>%
#      CNVScope::signedRescale(max_cap=1) %>%
#      reshape2::melt()  %>%
#    dplyr::mutate(col_pos=reshape2::colsplit(Var1,"_",c("chr","start","end"))$start,
#           row_pos=reshape2::colsplit(Var2,"_",c("chr","start","end"))$start,
#           rel_prob=value) %>%
#      ggplot(aes(x=col_pos,
#                 y=row_pos,
#                 fill=rel_prob)) + geom_raster() +
#      theme(axis.text.x = element_text(angle=90, hjust=1),axis.text.y=element_blank()) +
#      scale_x_continuous(breaks=reshape2::colsplit(chr_17_js_breakpoint_labels,"_",c("chr","start","end"))$start,labels=chr_17_js_breakpoint_labels) +
#      ggplot2::scale_fill_gradient2(low = "blue", high = "red", midpoint = 0.5, limits = c(0, 1)) +
#    labs(x="col_pos",y="row_pos",value="Pearson Correlation:") + ggtitle("Chromosome 17 relationship probability") +
#    geom_contour(binwidth = .395, aes(z = value))
#  breakpoint_plot <- chr_17_mat %>%   cor(use="pairwise.complete.obs",method="pearson") %>%
#      CNVScope::signedRescale(max_cap=1) %>%
#      reshape2::melt()  %>%
#    dplyr::mutate(col_pos=reshape2::colsplit(Var1,"_",c("chr","start","end"))$start,
#           row_pos=reshape2::colsplit(Var2,"_",c("chr","start","end"))$start,
#           correlation=value) %>%
#      ggplot(aes(x=col_pos,
#                 y=row_pos,
#                 fill=correlation)) + geom_raster() +
#      theme(axis.text.x = element_text(angle=90, hjust=1),axis.text.y=element_blank()) +
#      scale_x_continuous(breaks=reshape2::colsplit(chr_17_js_breakpoint_labels,"_",c("chr","start","end"))$start,labels=chr_17_js_breakpoint_labels) +
#      ggplot2::scale_fill_gradient2(low = "blue", high = "red", midpoint = 0.5, limits = c(0, 1)) +
#    labs(x="col_pos",y="row_pos",value="Pearson Correlation:") + ggtitle("Chromosome 17 linear relationship domains") +
#    geom_contour(binwidth = .395, aes(z = value))
#  breakpoint_plot_corr_diff <- ((chr_17_mat %>%   cor(use="pairwise.complete.obs",method="spearman"))-(chr_17_mat %>%   cor(use="pairwise.complete.obs",method="pearson"))) %>%
#      CNVScope::signedRescale(max_cap=1) %>%
#      reshape2::melt()  %>%
#    dplyr::mutate(col_pos=reshape2::colsplit(Var1,"_",c("chr","start","end"))$start,
#           row_pos=reshape2::colsplit(Var2,"_",c("chr","start","end"))$start,
#           corr_diff=value) %>%
#      ggplot(aes(x=col_pos,
#                 y=row_pos,
#                 fill=corr_diff)) + geom_raster() +
#      theme(axis.text.x = element_text(angle=90, hjust=1),axis.text.y=element_blank()) +
#      scale_x_continuous(breaks=reshape2::colsplit(chr_17_js_breakpoint_labels,"_",c("chr","start","end"))$start,labels=chr_17_js_breakpoint_labels) +
#      ggplot2::scale_fill_gradient2(low = "blue", high = "red", midpoint = 0.5, limits = c(0, 1)) +
#    labs(x="col_pos",y="row_pos",value="Pearson Correlation:") + ggtitle("Chromosome 17 nonlinear (red) relationship regions, inferred by nonlinear-linear correlation difference") +
#    geom_contour(binwidth = .395, aes(z = value))
#  
#  breakpoint_plot
#  breakpoint_plot_probdist
#  breakpoint_plot_corr_diff
#  

## ----plotly_blca,eval=F-------------------------------------------------------
#  library(plotly)
#  breakpoint_plot %>% plotly::ggplotly()

## ----3D_blca,eval=F-----------------------------------------------------------
#  chr_17_long <- chr_17_mat %>%   cor(use="pairwise.complete.obs",method="pearson") %>%
#      CNVScope::signedRescale(max_cap=1) %>%
#      reshape2::melt()  %>%
#    dplyr::mutate(col_pos=as.numeric(reshape2::colsplit(Var1,"_",c("chr","start","end"))$start),
#           row_pos=as.numeric(reshape2::colsplit(Var2,"_",c("chr","start","end"))$start),
#           correlation=value) %>% dplyr::select(col_pos,row_pos,correlation)
#  plot_ly(data = chr_17_long, x=chr_17_long$col_pos,y=chr_17_long$row_pos,z=chr_17_long$correlation,color=c(0,0.5,1),colors=circlize::colorRamp(c("blue","white","red")),intensity=chr_17_long$correlation,type = "mesh3d")

## ----skcm_files,eval=F,echo=T-------------------------------------------------
#  if(!dir.exists("extracted_skcm_data")){dir.create("extracted_skcm_data")}
#  untar("gdc_download_skcm.tar.gz",exdir = "./extracted_skcm_data")
#  tcga_files_skcm<-list.files(path = "extracted_skcm_data",pattern=glob2rx("*.tsv"),recursive=T,full.names = T)
#  print(tcga_files_skcm)

## ----eval=F,echo=T------------------------------------------------------------
#  #ptm <- proc.time()
#  #doMC::registerDoMC()
#  #doParallel::registerDoParallel()
#  sample_aggregated_segvals_skcm<-formSampleMatrixFromRawGDCData(tcga_files = tcga_files_skcm,format = "TCGA",parallel = T)
#  #proc.time() - ptm
#  saveRDS(sample_aggregated_segvals_skcm,"skcm_sample_matched_input_matrix.rds")

## ----eval=F,echo=T------------------------------------------------------------
#  if(!dir.exists("extracted_prad_data")){dir.create("extracted_prad_data")
#  untar("gdc_download_prad.tar.gz",exdir = "extracted_prad_data")}
#  tcga_files_prad<-list.files(path = "extracted_prad_data",pattern=glob2rx("*.tsv"),recursive=T,full.names = T)
#  print(tcga_files_prad)
#  

## ----eval=F,echo=T------------------------------------------------------------
#  sample_aggregated_segvals_output_full_prad<-formSampleMatrixFromRawGDCData(tcga_files = tcga_files_prad,format = "TCGA",binsize=1e6)
#  saveRDS(sample_aggregated_segvals_output_full_prad,"PRAD_sample_matched_input_matrix.rds")
#  

Try the CNVScope package in your browser

Any scripts or data that you put into this service are public.

CNVScope documentation built on March 31, 2022, 1:07 a.m.