data-raw/ExampleVcfDatasets.R

### Define working directory and genome assembly
inDir <- find.package("TMBleR") %>% file.path(., "data-raw/")
# Read genome
assembly="hg19"

### Read in input the dataset by Hellman et al., generated by whole exome sequencing that you want to analyze
## Read file names
infile <- "Hellman_CancerCell_2018__Mutations.txt"
## Add file path
infile <- paste0(inDir, infile)
dataset=read.table(file=infile, sep="\t", header=T)
## Subset the WES dataset so that it will only contain variants in the regions targeted by the FM1 panel
Panel=dataset[dataset$FM1=="VERO",]
Panel$PatientID=factor(Panel$PatientID)
Panel$ClinicalResponse=as.factor(Panel$Responder)
Panel=as.data.frame(Panel[,-24])
PanelNumMuts <- Panel %>%
    dplyr::select(PatientID, ClinicalResponse) %>%
    dplyr::group_by(PatientID) %>%
    dplyr::summarise(Panel.NumMuts=length(ClinicalResponse), ClinicalResponse=levels(factor(ClinicalResponse)))
PanelNumMuts$ClinicalResponse <- as.factor(PanelNumMuts$ClinicalResponse)
levels(PanelNumMuts$ClinicalResponse)[levels(PanelNumMuts$ClinicalResponse)=="VERO"] <- "responder"
levels(PanelNumMuts$ClinicalResponse)[levels(PanelNumMuts$ClinicalResponse)=="FALSO"] <- "nonresponder"
PanelNumMuts=as.data.frame(PanelNumMuts)

### Select for WES-based TMB quantification only nonsynonymous variants
WES=dataset[dataset$nonsynonymous=="FALSO",]
WES$PatientID=factor(WES$PatientID)
WES$ClinicalResponse=as.factor(WES$Responder)
PanelNumMuts=as.data.frame(PanelNumMuts[,-24])
WES=as.data.frame(WES[,-24])
WESNumMuts <- WES %>%
    dplyr::select(PatientID, ClinicalResponse) %>%
    dplyr::group_by(PatientID) %>%
    dplyr::summarise(WES.NumMuts=length(ClinicalResponse), 
    ClinicalResponse=levels(factor(ClinicalResponse)))

WESNumMuts$ClinicalResponse <- as.factor(WESNumMuts$ClinicalResponse)
levels(WESNumMuts$ClinicalResponse)[levels(WESNumMuts$ClinicalResponse)=="VERO"] <- "responder"
levels(WESNumMuts$ClinicalResponse)[levels(WESNumMuts$ClinicalResponse)=="FALSO"] <- "nonresponder"
WESNumMuts=as.data.frame(WESNumMuts)
### Merge simulated panel and WES data
Hellman_SimulatedFM1Panel_WES=merge(PanelNumMuts, WESNumMuts, by=c("PatientID", "ClinicalResponse"), all=F)
usethis::use_data(Hellman_SimulatedFM1Panel_WES, internal=FALSE, compress="gzip")


# Read genome
assembly="hg19"
# Read file names
vcfFiles <- list(Sample1="Sample1_ExampleWES_chr7.vcf", Sample2="Sample2_ExampleWES_chr7.vcf", Sample3="Sample3_ExampleWES_chr7.vcf", Sample4="Sample4_ExampleWES_chr7.vcf")
# Add file path
inDir <- find.package("TMBleR") %>% file.path(., "data-raw/")
vcfFiles <- lapply(vcfFiles, function(x) paste(inDir, x, sep = ""))
# Read vcf files and put in a list
ExampleWESvcfs <- readVcfFiles(vcfFiles=vcfFiles, assembly)
usethis::use_data(ExampleWESvcfs, internal=FALSE, compress="gzip")
acc-bioinfo/TMBleR documentation built on Dec. 18, 2021, 10:21 p.m.