inst/doc/RW5a_matrix.R

## ----loadPackages, echo=FALSE, warning=FALSE, message=FALSE-------------------
library(knitr)
# library(pander)
# suppressPackageStartupMessages(library(ramwas))
# panderOptions("digits", 3)
# opts_chunk$set(fig.width = 6, fig.height = 6)
opts_chunk$set(eval=FALSE)
# setwd('F:/meth')

## ----prereq, eval=FALSE-------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  BiocManager::install(c(
#      "minfi",
#      "IlluminaHumanMethylation450kmanifest",
#      "IlluminaHumanMethylationEPICmanifest",
#      "wateRmelon",
#      "readxl",
#      "RPMM",
#      "FlowSorted.Blood.450k",
#      "FlowSorted.Blood.EPIC"),
#      update = TRUE, ask = FALSE, quiet = TRUE)

## ----downloadUnTAR------------------------------------------------------------
#  download.file(
#      url = 'https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE42861&format=file',
#      destfile = 'GSE42861_RAW.tar',
#      quiet = TRUE,
#      mode = 'wb')
#  untar('GSE42861_RAW.tar')

## ----load---------------------------------------------------------------------
#  library(minfi)
#  download.file(  url = "http://shabal.in/RaMWAS/GSE42861_sampleSheet.csv",
#                  destfile = "GSE42861_sampleSheet.csv",
#                  destfilemode = "wb")
#  targets = read.csv( file = "GSE42861_sampleSheet.csv",
#                      stringsAsFactors = FALSE)
#  rgSet = read.metharray.exp(
#                      targets = targets,
#                      extended = TRUE,
#                      verbose = TRUE)

## ----probes-------------------------------------------------------------------
#  if( "IlluminaHumanMethylation450k" %in% rgSet@annotation ){
#      host = "http://www.sickkids.ca/MS-Office-Files/Research/Weksberg%20Lab/"
#      files = c(
#          i450k_ns.xlsx = "48639-non-specific-probes-Illumina450k.xlsx",
#          i450k_pl.xlsx = "48640-polymorphic-CpGs-Illumina450k.xlsx")
#  
#      for( i in seq_along(files) )
#          download.file(
#              url = paste0(host, files[i]),
#              destfile = names(files)[i],
#              mode = "wb",
#              quiet = TRUE)
#  
#      library(readxl)
#      ex1 = read_excel("i450k_ns.xlsx", sheet = 1)
#      ex2 = read_excel("i450k_pl.xlsx", sheet = 1)
#      ex3 = read_excel("i450k_pl.xlsx", sheet = 2)
#  
#      exclude.snp = unique(c(
#                  ex1$TargetID,
#                  ex2$PROBE,
#                  ex3$PROBE[ (ex3$BASE_FROM_SBE < 10) & (ex3$AF > 0.01)]))
#      rm(host, files, i, ex1, ex2, ex3)
#  } else {
#      host = "https://static-content.springer.com/esm/art%3A10.1186%2Fs13059-016-1066-1/MediaObjects/"
#      files = c(
#          S1_cross_reactive.csv     = '13059_2016_1066_MOESM1_ESM.csv',
#          S4_snp_cpg.csv            = '13059_2016_1066_MOESM4_ESM.csv',
#          S5_snp_base_extension.csv = '13059_2016_1066_MOESM5_ESM.csv',
#          S6_snp_body.csv           = '13059_2016_1066_MOESM6_ESM.csv')
#  
#      for( i in seq_along(files) )
#          download.file(
#              url = paste0(host, files[i]),
#              destfile = names(files)[i],
#              mode = "wb",
#              quiet = TRUE)
#  
#      snpcpgs1 = read.csv('S1_cross_reactive.csv', stringsAsFactors = FALSE)
#      snpcpgs4 = read.csv('S4_snp_cpg.csv', stringsAsFactors = FALSE)
#      snpcpgs5 = read.csv('S5_snp_base_extension.csv', stringsAsFactors = FALSE)
#      snpcpgs6 = read.csv('S6_snp_body.csv', stringsAsFactors = FALSE)
#  
#      exclude.snp = unique(c(
#          snpcpgs1$X,
#          snpcpgs4$PROBE,
#          snpcpgs5$PROBE,
#          snpcpgs6$PROBE[
#              pmax(snpcpgs6$VARIANT_START - snpcpgs6$MAPINFO,
#                   snpcpgs6$MAPINFO - snpcpgs6$VARIANT_END) < 10]))
#      rm(host, files, i, snpcpgs1, snpcpgs4, snpcpgs5, snpcpgs6)
#  }

## -----------------------------------------------------------------------------
#  lb = getNBeads(rgSet) < 3
#  pi1 = getProbeInfo(rgSet, type = "I")
#  pi2 = getProbeInfo(rgSet, type = "II")
#  ex1 = pi1$Name[rowMeans(lb[pi1$AddressA,] | lb[pi1$AddressB,]) > 0.01]
#  ex2 = pi2$Name[rowMeans(lb[pi2$AddressA,]) > 0.01]
#  exclude.bds = unique(c(ex1, ex2))
#  rm(lb, pi1, pi2, ex1, ex2)

## ----pv-----------------------------------------------------------------------
#  hp = detectionP(rgSet) > 0.01
#  exclude.hpv = rownames(hp)[rowMeans(hp) > 0.01]
#  keep.samples = colMeans(hp) < 0.01
#  rm(hp)

## ----exclude------------------------------------------------------------------
#  rgSet = subsetByLoci(
#              rgSet = rgSet[,keep.samples],
#              excludeLoci = c(exclude.snp, exclude.bds, exclude.hpv))

## ----beta---------------------------------------------------------------------
#  rgSetRaw = fixMethOutliers(preprocessRaw(rgSet))
#  # beta = BMIQ(rgSetRaw)
#  beta = getBeta(rgSetRaw)

## ----save---------------------------------------------------------------------
#  dir.create('rw', showWarnings = FALSE)
#  
#  rng = granges(mapToGenome(rgSet))
#  chr = seqnames(rng)
#  
#  # Save CpG locations
#  library(filematrix)
#  locs = cbind(chr = as.integer(chr), position = start(rng))
#  fmloc = fm.create.from.matrix(
#          filenamebase = paste0("rw/CpG_locations"),
#          mat = locs,
#          size = 4)
#  close(fmloc)
#  writeLines(con = 'rw/CpG_chromosome_names.txt', text = levels(chr))
#  
#  # Save estimates
#  fm = fm.create.from.matrix(
#          filenamebase = paste0("rw/Coverage"),
#          mat = t(beta))
#  close(fm)

## ----pca1---------------------------------------------------------------------
#  controlType = unique(getManifest(rgSet)@data$TypeControl$Type)
#  controlSet = getControlAddress(rgSet, controlType = controlType)
#  probeData = rbind(getRed(rgSet)[controlSet,], getGreen(rgSet)[controlSet,])

## ----pca2---------------------------------------------------------------------
#  data = probeData - rowMeans(probeData)
#  covmat = crossprod(data)
#  eig = eigen(covmat)

## ----val----------------------------------------------------------------------
#  library(ramwas)
#  plotPCvalues(eig$values, n = 20)
#  plotPCvectors(eig$vectors[,1], i = 1, col = 'blue')

## ----pplotpc, echo=FALSE------------------------------------------------------
#  png('PCval.png', 800, 800, pointsize = 28)
#  plotPCvalues(eig$values, n = 20)
#  dev.off()
#  png('PCvec.png', 800, 800, pointsize = 28)
#  plotPCvectors(eig$vectors[,1], i = 1, col = 'blue')
#  dev.off()

## ----cov.pca------------------------------------------------------------------
#  nPCs = 2
#  covariates.pca = eig$vectors[,seq_len(nPCs)]
#  colnames(covariates.pca) = paste0('PC',seq_len(nPCs))
#  rm(probeData, data, covmat, eig, nPCs)

## ----cell---------------------------------------------------------------------
#  covariates.cel = estimateCellCounts(
#                      rgSet = rgSet,
#                      compositeCellType = "Blood",
#                      cellTypes = c("CD8T", "CD4T", "NK",
#                                    "Bcell", "Mono", "Gran"),
#                      meanPlot = FALSE)

## ----umm----------------------------------------------------------------------
#  covariates.umm = getQC(rgSetRaw)

## ----pheno--------------------------------------------------------------------
#  rownames(targets) = targets$Basename;
#  targets$xSlide = paste0('x',targets$Slide) # Force Slide to be categorical
#  covariates.phe = targets[
#              rownames(covariates.umm),
#              c("xSlide", "Array", "caseStatus", "age", "sex", "smokingStatus")]

## ----param1-------------------------------------------------------------------
#  covariates = data.frame(
#                  samples = rownames(covariates.umm),
#                  covariates.umm,
#                  covariates.pca,
#                  covariates.cel,
#                  covariates.phe)
#  
#  library(ramwas)
#  param = ramwasParameters(
#      dircoveragenorm = 'rw',
#      covariates = covariates,
#      modelcovariates = NULL,
#      modeloutcome = "caseStatus",
#      modelPCs = 0)

## ----pcaNULL------------------------------------------------------------------
#  param$modelcovariates = NULL
#  param$modelPCs = 0
#  ramwas4PCA(param)
#  ramwas5MWAS(param)
#  qqPlotFast(getMWAS(param)$`p-value`)
#  title('QQ-plot\nNo covariates, no PCs')

## ----pcaFULL------------------------------------------------------------------
#  param$modelcovariates = c(
#      "age", "sex", "Array", "xSlide",
#      "mMed", "uMed",
#      "PC1", "PC2",
#      "CD8T", "CD4T", "NK", "Bcell", "Mono", "Gran")
#  param$modelPCs = 2
#  ramwas4PCA(param)
#  ramwas5MWAS(param)
#  qqPlotFast(getMWAS(param)$`p-value`)
#  title('QQ-plot\n13 covariates and 2 PC')

## ----plotmwas, echo=FALSE-----------------------------------------------------
#  png('QQnull.png', 800, 800, pointsize = 28)
#  param$modelcovariates = NULL
#  param$modelPCs = 0
#  mwas = getMWAS(param)
#  qqPlotFast(mwas$`p-value`)
#  title('QQ-plot\nNo covariates, no PCs')
#  dev.off()
#  
#  png('QQfull.png', 800, 800, pointsize = 28)
#  param$modelcovariates = c(
#      "age", "sex", "Array", "xSlide",
#      "mMed", "uMed",
#      "PC1", "PC2",
#      "CD8T", "CD4T", "NK", "Bcell", "Mono", "Gran")
#  param$modelPCs = 2
#  mwas = getMWAS(param)
#  qqPlotFast(mwas$`p-value`)
#  title('QQ-plot\n13 covariates and 2 PCs')
#  dev.off()

Try the ramwas package in your browser

Any scripts or data that you put into this service are public.

ramwas documentation built on Nov. 8, 2020, 8:24 p.m.