inst/doc/gwasurvivr_Introduction.R

## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, cache=FALSE, eval = TRUE)
library(knitr)

## ---- echo=FALSE--------------------------------------------------------------
cols <- c("RSID",
          "CHR",
          "POS",
          "REF",
          "ALT",
          "SAMP_FREQ_ALT",
          "SAMP_MAF",
          "PVALUE",
          "HR",
          "HR_lowerCI",
          "HR_upperCI",
          "COEF",
          "SE.COEF",
          "Z",
          "N",
          "NEVENT")

desc <- c("SNP ID",
          "Chromosome number",
          "Genomic Position (BP)",
          "Reference Allele",
          "Alternate Allele",
          "Alternate Allele frequency in sample being tested",
          "Minor allele frequency in sample being tested",
          "P-value of single SNP or interaction term",
          "Hazard Ratio (HR)",
          "Lower bound 95% CI of HR",
          "Upper bound 95% CI of HR",
          "Estimated coefficient of SNP",
          "Standard error of coefficient estimate",
          "Z-statistic",
          "Number of individuals in sample being tested",
          "Number of events that occurred in sample being tested")

df <- cbind(cols, desc)
colnames(df) <- c("Column", "Description")
kable(df)

## ---- echo=FALSE--------------------------------------------------------------
cols <- c("TYPED",
          "AF", 
          "MAF",
          "R2",
          "ER2")
desc <- c("Imputation status: TRUE (SNP IS TYPED)/FALSE (SNP IS IMPUTED)",
          "Minimac3 output Alternate Allele Frequency",
          "Minimac3 output of Minor Allele Frequency",
          "Imputation R2 score (minimac3 $R^2$)",
          "Minimac3 ouput empirical $R^2$")
df <- cbind(cols, desc)
colnames(df) <- c("Column", "Description")

kable(df)

## ---- echo=FALSE--------------------------------------------------------------
cols <- c("TYPED",
          "RefPanelAF",
          "INFO")
desc <- c("Imputation status: TRUE (SNP IS TYPED)/FALSE (SNP IS IMPUTED)",
          "HRC Reference Panel Allele Frequency",
          "Imputation INFO score from PBWT")
df <- cbind(cols, desc)
colnames(df) <- c("Column", "Description")
kable(df)

## ---- echo=FALSE--------------------------------------------------------------
cols <- c("TYPED",
          "A0",
          "A1", 
          "exp_freq_A1")
desc <- c("`---` is imputed, repeated RSID is typed",
          "Allele coded 0 in IMPUTE2",
          "Allele coded 1 in IMPUTE2",
          "Expected allele frequency of alelle code A1")
df <- cbind(cols, desc)
colnames(df) <- c("Column", "Description")
kable(df)

## ---- eval=FALSE--------------------------------------------------------------
#  devtools::install_github("suchestoncampbelllab/gwasurvivr")

## ---- eval=FALSE--------------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("gwasurvivr", version = "devel")

## ---- eval=FALSE--------------------------------------------------------------
#  install.packages(c("ncdf4", "matrixStats", "parallel", "survival"))

## ---- eval=FALSE--------------------------------------------------------------
#  BiocManager::install("GWASTools")
#  BiocManager::install("VariantAnnotation")
#  BiocManager::install("SummarizedExperiment")
#  BiocManager::install("SNPRelate")

## -----------------------------------------------------------------------------
library(gwasurvivr)

## ---- eval=FALSE--------------------------------------------------------------
#  options("gwasurvivr.cores"=4)

## ---- eval=FALSE--------------------------------------------------------------
#  library(parallel)
#  cl <- makeCluster(detectCores())
#  
#  impute2CoxSurv(..., clusterObj=cl)
#  sangerCoxSurv(..., clusterObj=cl)
#  michiganCoxSurv(..., clusterObj=cl)

## ---- message=FALSE-----------------------------------------------------------
vcf.file <- system.file(package="gwasurvivr",
                        "extdata", 
                        "michigan.chr14.dose.vcf.gz")
pheno.fl <- system.file(package="gwasurvivr",
                        "extdata", 
                        "simulated_pheno.txt")
pheno.file <- read.table(pheno.fl,
                         sep=" ", 
                         header=TRUE,
                         stringsAsFactors = FALSE)
head(pheno.file)

# recode sex column and remove first column 
pheno.file$SexFemale <- ifelse(pheno.file$sex=="female", 1L, 0L)
# select only experimental group sample.ids
sample.ids <- pheno.file[pheno.file$group=="experimental",]$ID_2
head(sample.ids)

## ----eval=FALSE---------------------------------------------------------------
#  michiganCoxSurv(vcf.file=vcf.file,
#                  covariate.file=pheno.file,
#                  id.column="ID_2",
#                  sample.ids=sample.ids,
#                  time.to.event="time",
#                  event="event",
#                  covariates=c("age", "SexFemale", "DrugTxYes"),
#                  inter.term=NULL,
#                  print.covs="only",
#                  out.file="michigan_only",
#                  r2.filter=0.3,
#                  maf.filter=0.005,
#                  chunk.size=100,
#                  verbose=TRUE,
#                  clusterObj=NULL)

## ---- echo=FALSE--------------------------------------------------------------
michiganCoxSurv(vcf.file=vcf.file,
                covariate.file=pheno.file,
                id.column="ID_2",
                sample.ids=sample.ids,
                time.to.event="time",
                event="event",
                covariates=c("age", "SexFemale", "DrugTxYes"),
                inter.term=NULL,
                print.covs="only",
                out.file=tempfile("michigan_only"),
                r2.filter=0.3,
                maf.filter=0.005,
                chunk.size=100,
                verbose=TRUE,
                clusterObj=NULL)

## ---- message=FALSE, eval=FALSE-----------------------------------------------
#  michigan_only <- read.table("michigan_only.coxph", sep="\t", header=TRUE, stringsAsFactors = FALSE)

## ---- message=FALSE, echo=FALSE-----------------------------------------------
michigan_only <- read.table(dir(tempdir(), pattern="^michigan_only.*\\.coxph$", full.names = TRUE), sep="\t", header=TRUE, stringsAsFactors = FALSE)

## -----------------------------------------------------------------------------
str(head(michigan_only))

## ----eval=FALSE---------------------------------------------------------------
#  michiganCoxSurv(vcf.file=vcf.file,
#                  covariate.file=pheno.file,
#                  id.column="ID_2",
#                  sample.ids=sample.ids,
#                  time.to.event="time",
#                  event="event",
#                  covariates=c("age", "SexFemale", "DrugTxYes"),
#                  inter.term="DrugTxYes",
#                  print.covs="only",
#                  out.file="michigan_intx_only",
#                  r2.filter=0.3,
#                  maf.filter=0.005,
#                  chunk.size=100,
#                  verbose=FALSE,
#                  clusterObj=NULL)

## ---- echo=FALSE--------------------------------------------------------------
michiganCoxSurv(vcf.file=vcf.file,
                covariate.file=pheno.file,
                id.column="ID_2",
                sample.ids=sample.ids,
                time.to.event="time",
                event="event",
                covariates=c("age", "SexFemale", "DrugTxYes"),
                inter.term="DrugTxYes",
                print.covs="only",
                out.file=tempfile("michigan_intx_only"),
                r2.filter=0.3,
                maf.filter=0.005,
                chunk.size=100,
                verbose=FALSE,
                clusterObj=NULL)

## ---- message=FALSE, eval=FALSE-----------------------------------------------
#  michigan_intx_only <- read.table("michigan_intx_only.coxph", sep="\t", header=TRUE, stringsAsFactors = FALSE)

## ---- message=FALSE, echo=FALSE-----------------------------------------------
michigan_intx_only <- read.table(dir(tempdir(), pattern="^michigan_intx_only.*\\.coxph$", full.names = TRUE), sep="\t", header=TRUE, stringsAsFactors = FALSE)

## -----------------------------------------------------------------------------
str(head(michigan_intx_only))

## ---- eval=TRUE---------------------------------------------------------------
vcf.file <- system.file(package="gwasurvivr",
                        "extdata", 
                        "sanger.pbwt_reference_impute.vcf.gz")
pheno.fl <- system.file(package="gwasurvivr",
                        "extdata", 
                        "simulated_pheno.txt")
pheno.file <- read.table(pheno.fl,
                         sep=" ",
                         header=TRUE,
                         stringsAsFactors = FALSE)
head(pheno.file)
# recode sex column and remove first column 
pheno.file$SexFemale <- ifelse(pheno.file$sex=="female", 1L, 0L)
# select only experimental group sample.ids
sample.ids <- pheno.file[pheno.file$group=="experimental",]$ID_2
head(sample.ids)

## ---- eval=FALSE--------------------------------------------------------------
#  sangerCoxSurv(vcf.file=vcf.file,
#                covariate.file=pheno.file,
#                id.column="ID_2",
#                sample.ids=sample.ids,
#                time.to.event="time",
#                event="event",
#                covariates=c("age", "SexFemale", "DrugTxYes"),
#                inter.term=NULL,
#                print.covs="only",
#                out.file="sanger_only",
#                info.filter=0.3,
#                maf.filter=0.005,
#                chunk.size=100,
#                verbose=TRUE,
#                clusterObj=NULL)

## ---- echo=FALSE--------------------------------------------------------------
sangerCoxSurv(vcf.file=vcf.file,
              covariate.file=pheno.file,
              id.column="ID_2",
              sample.ids=sample.ids,
              time.to.event="time",
              event="event",
              covariates=c("age", "SexFemale", "DrugTxYes"),
              inter.term=NULL,
              print.covs="only",
              out.file=tempfile("sanger_only"),
              info.filter=0.3,
              maf.filter=0.005,
              chunk.size=100,
              verbose=TRUE,
              clusterObj=NULL)

## ---- message=FALSE, echo=FALSE-----------------------------------------------
sanger_only <- read.table(dir(tempdir(), pattern="^sanger_only.*\\.coxph$", full.names = TRUE), sep="\t", header=TRUE, stringsAsFactors = FALSE)

## -----------------------------------------------------------------------------
str(head(sanger_only))

## ----eval=FALSE---------------------------------------------------------------
#  sangerCoxSurv(vcf.file=vcf.file,
#                covariate.file=pheno.file,
#                id.column="ID_2",
#                sample.ids=sample.ids,
#                time.to.event="time",
#                event="event",
#                covariates=c("age", "SexFemale", "DrugTxYes"),
#                inter.term="DrugTxYes",
#                print.covs="only",
#                out.file="sanger_intx_only",
#                info.filter=0.3,
#                maf.filter=0.005,
#                chunk.size=100,
#                verbose=TRUE,
#                clusterObj=NULL)

## ---- echo=FALSE, eval=FALSE--------------------------------------------------
#  sangerCoxSurv(vcf.file=vcf.file,
#                covariate.file=pheno.file,
#                id.column="ID_2",
#                sample.ids=sample.ids,
#                time.to.event="time",
#                event="event",
#                covariates=c("age", "SexFemale", "DrugTxYes"),
#                inter.term="DrugTxYes",
#                print.covs="only",
#                out.file=tempfile("sanger_intx_only"),
#                info.filter=0.3,
#                maf.filter=0.005,
#                chunk.size=100,
#                verbose=TRUE,
#                clusterObj=NULL)

## ---- message=FALSE-----------------------------------------------------------
impute.file <- system.file(package="gwasurvivr",
                            "extdata",
                            "impute_example.impute2.gz")
sample.file <- system.file(package="gwasurvivr",
                           "extdata", 
                           "impute_example.impute2_sample")
covariate.file <- system.file(package="gwasurvivr", 
                              "extdata",
                              "simulated_pheno.txt")
covariate.file <- read.table(covariate.file, 
                             sep=" ",
                             header=TRUE,
                             stringsAsFactors = FALSE)
covariate.file$SexFemale <- ifelse(covariate.file$sex=="female", 1L, 0L)
sample.ids <- covariate.file[covariate.file$group=="experimental",]$ID_2

## ---- eval=FALSE--------------------------------------------------------------
#  impute2CoxSurv(impute.file=impute.file,
#                 sample.file=sample.file,
#                 chr=14,
#                 covariate.file=covariate.file,
#                 id.column="ID_2",
#                 sample.ids=sample.ids,
#                 time.to.event="time",
#                 event="event",
#                 covariates=c("age", "SexFemale", "DrugTxYes"),
#                 inter.term=NULL,
#                 print.covs="only",
#                 out.file="impute_example_only",
#                 chunk.size=100,
#                 maf.filter=0.005,
#                 exclude.snps=NULL,
#                 flip.dosage=TRUE,
#                 verbose=TRUE,
#                 clusterObj=NULL)

## ---- echo=FALSE--------------------------------------------------------------
impute2CoxSurv(impute.file=impute.file,
               sample.file=sample.file,
               chr=14,
               covariate.file=covariate.file,
               id.column="ID_2",
               sample.ids=sample.ids,
               time.to.event="time",
               event="event",
               covariates=c("age", "SexFemale", "DrugTxYes"),
               inter.term=NULL,
               print.covs="only",
               out.file=tempfile("impute_example_only"),
               chunk.size=100,
               maf.filter=0.005,
               exclude.snps=NULL,
               flip.dosage=TRUE,
               verbose=TRUE,
               clusterObj=NULL)

## ---- message=FALSE, eval=FALSE-----------------------------------------------
#  impute2_res_only <- read.table("impute_example_only.coxph", sep="\t", header=TRUE, stringsAsFactors = FALSE)
#  str(head(impute2_res_only))

## ---- message=FALSE, echo=FALSE-----------------------------------------------
impute2_res_only <- read.table(dir(tempdir(), pattern="^impute_example_only.*\\.coxph$", full.names=TRUE), sep="\t", header=TRUE, stringsAsFactors = FALSE)
str(head(impute2_res_only))

## ---- eval=FALSE--------------------------------------------------------------
#  impute2CoxSurv(impute.file=impute.file,
#                 sample.file=sample.file,
#                 chr=14,
#                 covariate.file=covariate.file,
#                 id.column="ID_2",
#                 sample.ids=sample.ids,
#                 time.to.event="time",
#                 event="event",
#                 covariates=c("age", "SexFemale", "DrugTxYes"),
#                 inter.term="DrugTxYes",
#                 print.covs="only",
#                 out.file="impute_example_intx",
#                 chunk.size=100,
#                 maf.filter=0.005,
#                 flip.dosage=TRUE,
#                 verbose=FALSE,
#                 clusterObj=NULL,
#                 keepGDS=FALSE)

## ---- echo=FALSE--------------------------------------------------------------
impute2CoxSurv(impute.file=impute.file,
               sample.file=sample.file,
               chr=14,
               covariate.file=covariate.file,
               id.column="ID_2",
               sample.ids=sample.ids,
               time.to.event="time",
               event="event",
               covariates=c("age", "SexFemale", "DrugTxYes"),
               inter.term="DrugTxYes",
               print.covs="only",
               out.file=tempfile("impute_example_intx"),
               chunk.size=100,
               maf.filter=0.005,
               flip.dosage=TRUE,
               verbose=FALSE,
               clusterObj=NULL,
               keepGDS=FALSE)

## ---- message=FALSE, eval=FALSE-----------------------------------------------
#  impute2_res_intx <- read.table("impute_example_intx.coxph", sep="\t", header=TRUE, stringsAsFactors = FALSE)
#  str(head(impute2_res_intx))

## ---- message=FALSE, echo=FALSE-----------------------------------------------
impute2_res_intx <- read.table(dir(tempdir(), pattern="^impute_example_intx.*\\.coxph$", full.names=TRUE), sep="\t", header=TRUE, stringsAsFactors = FALSE)
str(head(impute2_res_intx))

## ---- eval=FALSE--------------------------------------------------------------
#  ## mysurvivalscript.R
#  library(gwasurvivr)
#  library(batch)
#  
#  parseCommandArgs(evaluate=TRUE) # this is loaded in the batch package
#  
#  options("gwasurvivr.cores"=4)
#  
#  vcf.file <- system.file(package="gwasurvivr",
#                          "extdata",
#                          vcf.file)
#  pheno.fl <- system.file(package="gwasurvivr",
#                          "extdata",
#                          pheno.file)
#  pheno.file <- read.table(pheno.fl,
#                           sep=" ",
#                           header=TRUE,
#                           stringsAsFactors = FALSE)
#  # recode sex column and remove first column
#  pheno.file$SexFemale <- ifelse(pheno.file$sex=="female", 1L, 0L)
#  # select only experimental group sample.ids
#  sample.ids <- pheno.file[pheno.file$group=="experimental",]$ID_2
#  ## -- unlist the covariates
#  ## (refer below to the shell script as to why we are doing this)
#  covariates <- unlist(sapply(covariates, strsplit, "_", 1, "[["))
#  
#  sangerCoxSurv(vcf.file=vcf.file,
#                covariate.file=pheno.file,
#                id.column="ID_2",
#                sample.ids=sample.ids,
#                time.to.event=time,
#                event=event,
#                covariates=covariates,
#                inter.term=NULL,
#                print.covs="only",
#                out.file=out.file,
#                info.filter=0.3,
#                maf.filter=0.005,
#                chunk.size=100,
#                verbose=TRUE,
#                clusterObj=NULL)

## ---- eval=FALSE--------------------------------------------------------------
#  #!/bin/bash
#  DIRECTORY=/path/to/dir/impute_chr
#  
#  module load R
#  
#  R --script ${DIRECTORY}/survival/code/mysurvivalscript.R -q --args \
#          vcf.file ${DIRECTORY}/sanger.pbwt_reference_impute.vcf.gz \
#          pheno.file ${DIRECTORY}/phenotype_data/simulated_pheno.txt \
#          covariates DrugTxYes_age_SexFemale\
#          time.to.event time \
#          event event \
#          out.file ${DIRECTORY}/survival/results/sanger_example_output

## ---- echo=FALSE--------------------------------------------------------------
sessionInfo()

Try the gwasurvivr package in your browser

Any scripts or data that you put into this service are public.

gwasurvivr documentation built on Nov. 8, 2020, 6:53 p.m.