inst/doc/RW4_SNPs.R

## ----loadPackages, echo=FALSE, warning=FALSE, message=FALSE-------------------
library(knitr)
library(pander)
suppressPackageStartupMessages(library(ramwas))
panderOptions("digits", 3)
opts_chunk$set(fig.width = 6, fig.height = 6)
# opts_chunk$set(eval=FALSE)
dr = "D:/temp/"

## ----generateData-------------------------------------------------------------
library(ramwas)

# work in a temporary directory
dr = paste0(tempdir(), "/simulated_matrix_data")
dir.create(dr, showWarnings = FALSE)
cat(dr,"\n")

## ----dims, eval=TRUE----------------------------------------------------------
nsamples = 200
nvariables = 100000

## ----setseed1, echo=FALSE-----------------------------------------------------
set.seed(18090212)

## ----genCovar-----------------------------------------------------------------
covariates = data.frame(
    sample = paste0("Sample_",seq_len(nsamples)),
    sex = seq_len(nsamples) %% 2,
    age = runif(nsamples, min = 20, max = 80),
    batch = paste0("batch",(seq_len(nsamples) %% 3))
)
pander(head(covariates))

## ----setseed2, echo=FALSE-----------------------------------------------------
set.seed(18090212)

## ----genLocs------------------------------------------------------------------
temp = cumsum(sample(20e7 / nvariables, nvariables, replace = TRUE) + 0)
chr      = as.integer(temp %/% 1e7) + 1L
position = as.integer(temp %% 1e7)

locmat = cbind(chr = chr, position = position)
chrnames = paste0("chr", 1:10)
pander(head(locmat))

## ----locSave------------------------------------------------------------------
fmloc = fm.create.from.matrix(
            filenamebase = paste0(dr, "/CpG_locations"),
            mat = locmat)
close(fmloc)
writeLines(con = paste0(dr, "/CpG_chromosome_names.txt"), text = chrnames)

## ----setseed3, echo=FALSE-----------------------------------------------------
set.seed(18090212)

## ----fillDataMat--------------------------------------------------------------
fmm = fm.create(paste0(dr,"/Coverage"), nrow = nsamples, ncol = nvariables)
fms = fm.create(paste0(dr,"/SNPs"), nrow = nsamples, ncol = nvariables,
                size = 1, type = "integer")

# Row names of the matrices are set to sample names
rownames(fmm) = as.character(covariates$sample)
rownames(fms) = as.character(covariates$sample)

# The matrices are filled, 2000 variables at a time
byrows = 2000
for( i in seq_len(nvariables/byrows) ){ # i=1
    ind = (1:byrows) + byrows*(i-1)

    snps = rbinom(n = byrows * nsamples, size = 2, prob = 0.2)
    dim(snps) = c(nsamples, byrows)
    fms[,ind] = snps

    slice = double(nsamples*byrows)
    dim(slice) = c(nsamples, byrows)
    slice[,  1:225] = slice[,  1:225] + covariates$sex / 50 / sd(covariates$sex)
    slice[,101:116] = slice[,101:116] + covariates$age / 16 / sd(covariates$age)
    slice = slice +
                ((as.integer(factor(covariates$batch))+i) %% 3) / 200 +
                snps / 1.5 +
                runif(nsamples*byrows) / 2
    fmm[,ind] = slice
}
close(fms)
close(fmm)

## ----paramMWAS, warning=FALSE, message=FALSE----------------------------------
param = ramwasParameters(
    dircoveragenorm = dr,
    covariates = covariates,
    modelcovariates = "batch",
    modeloutcome = "sex",
    toppvthreshold = 20,
    fileSNPs = "SNPs"
)

## ----threads, echo=FALSE------------------------------------------------------
# Bioconductor requires limit of 2 parallel jobs
param$cputhreads = 2

## ----SNPs, message=FALSE, warning=FALSE---------------------------------------
ramwasSNPs(param)

## ----plotQQ2, echo=FALSE, warning=FALSE, message=FALSE------------------------
pfull = parameterPreprocess(param)
mwas = getMWAS(pfull$dirSNPs)
qqPlotFast(mwas$`p-value`)
title("QQ-plot for CpG-SNP analysis")

## ----MWAS, message=FALSE, warning=FALSE---------------------------------------
ramwas5MWAS(param)

## ----plotQQ1, echo=FALSE, warning=FALSE, message=FALSE------------------------
mwas = getMWAS(param)
qqPlotFast(mwas$`p-value`)
title(pfull$qqplottitle)

## ----topPvSNPs----------------------------------------------------------------
# Get the directory with testing results
toptbl = read.table(
                paste0(pfull$dirSNPs, "/Top_tests.txt"),
                header = TRUE,
                sep = "\t")
pander(head(toptbl,10))

## ----topPvMWAS----------------------------------------------------------------
pfull = parameterPreprocess(param)
toptbl = read.table(
                paste0(pfull$dirmwas, "/Top_tests.txt"),
                header = TRUE,
                sep = "\t")
pander(head(toptbl,10))

Try the ramwas package in your browser

Any scripts or data that you put into this service are public.

ramwas documentation built on Nov. 8, 2020, 8:24 p.m.