inst/doc/mega2rtutorial.R

## ----echo=FALSE,message=FALSE,warning=FALSE-----------------------------------
library(knitr)
# Set so that long lines in R will be wrapped:
opts_chunk$set(tidy.opts=list(width.cutoff=80),tidy=TRUE)

## ----mega2rtutDump, message=FALSE---------------------------------------------
library(Mega2R)
dump_mega2rtutorial_data()

## ----mega2rtutDumpls, message=FALSE-------------------------------------------
list.files(where_mega2rtutorial_data())

## ----mega2rtutClean,eval=FALSE------------------------------------------------
#  clean_mega2rtutorial_data()

## ----install,eval=FALSE-------------------------------------------------------
#  install.packages("Mega2R")

## ----seqsimrdb,eval=FALSE-----------------------------------------------------
#  library(Mega2R)
#  # Before issuing the next command, make sure you have issued
#  # this command `dump_mega2rtutorial_data()` first
#  # as instructed in the "Tutorial Data' section above.
#  db = file.path(where_mega2rtutorial_data(), "seqsimr.db")
#  ENV = read.Mega2DB(db, verbose = TRUE)

## ----seqsimrdb2---------------------------------------------------------------
# Before issuing the next command, make sure you have issued
# this command `dump_mega2rtutorial_data()` first
# as instructed in the "Tutorial Data' section above.
db = file.path(where_mega2rtutorial_data(), "seqsimr.db")
ENV = read.Mega2DB(db, verbose = TRUE)

## ----ls-----------------------------------------------------------------------
ls(ENV)

## ----showMega2ENV-------------------------------------------------------------
showMega2ENV() 

## ----str----------------------------------------------------------------------
str(ENV$locus_table)

## ----setrange0----------------------------------------------------------------
    dim(ENV$refRanges)
    head(ENV$refRanges)

## ----setrange1----------------------------------------------------------------
    ranges = matrix(c(1, 2240000, 2245000,
                      1, 2245000, 2250000,
                      1, 3760000, 3761000,
                      1, 3761000, 3762000,
                      1, 3762000, 3763000,
                      1, 3763000, 3764000,
                      1, 3764000, 3765000,
                      1, 3765000, 3763760,
                      1, 3763760, 3767000,
                      1, 3767000, 3768000,
                      1, 3768000, 3769000,
                      1, 3769000, 3770000),
                      ncol = 3, nrow = 12, byrow = TRUE)

    setRanges(ranges, 1:3)

    dim(ENV$refRanges)
    head(ENV$refRanges)

## ----setrange2----------------------------------------------------------------
    ranges = matrix(c(1, 2240000, 2245000,
                      1, 2245000, 2250000,
                      1, 3760000, 3761000,
                      1, 3761000, 3762000,
                      1, 3762000, 3763000,
                      1, 3763000, 3764000,
                      1, 3764000, 3765000,
                      1, 3765000, 3763760,
                      1, 3763760, 3767000,
                      1, 3767000, 3768000,
                      1, 3768000, 3769000,
                      1, 3769000, 3770000),
                      ncol = 3, nrow = 12, byrow = TRUE)
    ranges = data.frame(ranges)
    ranges$name = LETTERS[1:12]
    names(ranges) = c("chr", "start", "end", "name")

    setRanges(ranges, 1:4)
    dim(ENV$refRanges)
    head(ENV$refRanges)

## ----callback-----------------------------------------------------------------
    show = function(m, r, e) {
       print("rrrrrrrrrr")
       print(r)
       print("mmmmmmmmmm")
       print(m)
       print("g6g6g6g6g6")
       print(head(getgenotypes(m, envir = e)))
    }

## ----applyfntoranges1,eval=TRUE-----------------------------------------------
    # apply function "show" to all the ranges
    # ranges
    ENV$verbose = FALSE
    applyFnToRanges(show)

## ----applyfntoranges2,eval=TRUE-----------------------------------------------
    # apply function "show" to all genotypes on chromosomes 1 
    # ranges
    applyFnToRanges(show, 
                    ranges_arg =
                    matrix(c(1, 4000000, 5000000, "range4m",
                             1, 5000000, 6000000, "range5m",
                             1, 6000000, 7000000, "range6m",
                             1, 7000000, 8000000, "range7m",
                             1, 8000000, 9000000, "range8m",
                             1, 9000000,10000000, "range9m"),
                            ncol = 4, nrow = 6, byrow = TRUE),
                    indices_arg = 1:4)

## ----setannotations0----------------------------------------------------------
    ENV$txdb
    ENV$entrezGene

## ----applyfntogenes0,eval=TRUE------------------------------------------------
if ((require("org.Hs.eg.db")  &
     require("TxDb.Hsapiens.UCSC.hg19.knownGene")) == TRUE) {
  # apply function "show" to all transcripts on genes ELL2 and CARD15
  applyFnToGenes(show, genes_arg = c("CEP104"))
}

## ----applyfntogenes19,eval=TRUE-----------------------------------------------
if ((require("org.Hs.eg.db")  &
     require("TxDb.Hsapiens.UCSC.hg19.knownGene")) == TRUE) {
  setAnnotations("TxDb.Hsapiens.UCSC.hg19.knownGene", "org.Hs.eg.db")
  applyFnToGenes(show, genes_arg = c("CEP104"))
}

## ----applyfntogenes,eval=TRUE-------------------------------------------------
    # apply function "show" to all genotypes on chromosomes 1 for two base
    # pair ranges
    applyFnToGenes(show, ranges_arg = 
                            matrix(c(1,  5000000, 10000000,
                                     1, 10000000, 15000000),
                                    ncol = 3, nrow = 2, byrow = TRUE))

    # apply function "show" to all genotypes for first marker 
    # in each chromosome (We only have data for chromosome 1.)
    # NOTE: Since we are using an arbitrary collection of markers, the
    # range is not available.
    applyFnToGenes(show, markers_arg = ENV$markers[! duplicated(ENV$markers$chromosome), 3])

    # apply function "show" to all genotypes on chromosomes 24 and 26.
    # remember our example database is only chr 1
    applyFnToGenes(show, chrs_arg=c(24, 26))

## ----showstat,eval=TRUE-------------------------------------------------------
show.stat = function(m, r, e, fn) {
      print(r) 
# collect genotypes for the set of markers "m"
      mm = getgenotypes(m, envir=e) 
# apply xxx.test of trait vs marker (accumulating samples)
      pv = apply(mm, 2, fn)
      names(pv) = m$MarkerName
      print(pv)
}

show2 = function(m, r, e) {
    f = function(x) {
          tryCatch(fisher.test(table(e$fam$trait, x)),
                   error = function(e) {list(p.value = NA)}) $p.value }

    show.stat(m, r, e, f)
}

show3 = function(m, r, e) {
    f = function(x) {
          tryCatch(chisq.test(table(e$fam$trait, x)),
                   error = function(e) {list(p.value = NA)}) $p.value }

    show.stat(m, r, e, f)
}

## ----xshow2,eval=TRUE---------------------------------------------------------
if ((require("org.Hs.eg.db")  &
     require("TxDb.Hsapiens.UCSC.hg19.knownGene")) == TRUE) {
  applyFnToGenes(show2, genes_arg = c("CEP104"))
}

## ----xshow3,eval=TRUE---------------------------------------------------------
if ((require("org.Hs.eg.db")  &
     require("TxDb.Hsapiens.UCSC.hg19.knownGene")) == TRUE) {
  applyFnToGenes(show3, genes_arg = c("CEP104"))
}

## ----getgenotypes,eval=TRUE---------------------------------------------------
    genotype = getgenotypes(ENV$markers[1:10,])
    dim(genotype)
    head(genotype)

## ----getgenotypesraw,eval=TRUE------------------------------------------------
# two ints in upper/lower half integer representing allele 
    raw = getgenotypesraw(ENV$markers[1:10,])
    dim(raw)
    head(raw)

## ----init_pedgene,message=FALSE-----------------------------------------------
# Before issuing the next command, make sure you have issued
# this command `dump_mega2rtutorial_data()` first
# as instructed in the "Tutorial Data' section above.
db = file.path(where_mega2rtutorial_data(), "seqsimr.db")
ENV = init_pedgene(db)

## ----mega2pedgene-------------------------------------------------------------
ENV$verbose=TRUE
Mega2pedgene(gs=50:60)

## ----FnToRanges,eval=FALSE----------------------------------------------------
#  # we will skip this line for the Rmd document production because it takes too long
#  applyFnToRanges(DOpedgene, ENV$refRanges, ENV$refIndices, envir = ENV)

## ----eval=FALSE---------------------------------------------------------------
#  if ((require("org.Hs.eg.db")  &
#       require("TxDb.Hsapiens.UCSC.hg19.knownGene")) == TRUE) {
#    applyFnToGenes(
#      DOpedgene,
#      genes_arg = c('DISP1', 'KIF26B', 'AK5', 'ST7L'),
#      envir = ENV
#    )
#  }

## ----FnToGenes,eval=TRUE------------------------------------------------------
if ((require("org.Hs.eg.db")  &
     require("TxDb.Hsapiens.UCSC.hg19.knownGene")) == TRUE) {
  applyFnToGenes(DOpedgene,
                 genes_arg = c('DISP1', 'AK5'),
                 envir = ENV)
}

## ----FnToGenesall,eval=FALSE--------------------------------------------------
#  if ((require("org.Hs.eg.db")  &
#       require("TxDb.Hsapiens.UCSC.hg19.knownGene")) == TRUE) {
#    # we will skip this line for the Rmd document production because it takes too long
#    applyFnToGenes(DOpedgene, genes_arg = '*', envir = ENV)
#  }

## ----init_SKAT,message=FALSE--------------------------------------------------
# Before issuing the next command, make sure you have issued
# this command `dump_mega2rtutorial_data()` first
# as instructed in the "Tutorial Data' section above.
db = file.path(where_mega2rtutorial_data(), "seqsimr.db")
ENV = init_SKAT(db, verbose = F, allMarkers = F)

## ----init_SKAT2,eval=FALSE----------------------------------------------------
#  # Before issuing the next command, make sure you have issued
#  # this command `dump_mega2rtutorial_data()` first
#  # as instructed in the "Tutorial Data' section above.
#  db = file.path(where_mega2rtutorial_data(), "seqsimr.db")
#  ENV = init_SKAT(db, verbose = T, allMarkers = F)

## ----FnToRangesSKAT,eval=TRUE-------------------------------------------------
ENV$verbose = FALSE
ENV$SKAT_results = ENV$SKAT_results[0, ]
Mega2SKAT(ENV$phe[, 3] - 1 ~ 1, "D", kernel = "linear.weighted", weights.beta=c(0.5,0.5))

## ----FnToRangesa,eval=TRUE----------------------------------------------------
print(ENV$SKAT_results)

## ----FnToRange2s,eval=FALSE---------------------------------------------------
#  # we will skip this line for the Vignette document production because it takes too long
#  ENV$verbose = FALSE
#  ENV$SKAT_results = ENV$SKAT_results[0, ]
#  Mega2SKAT(ENV$phe[, 3] - 1 ~ 1, "D", kernel = "linear.weighted", weights.beta=c(0.5,0.5), gs=1:nrow(ENV$refRanges))
#  print(ENV$SKAT_results)

## ----FnToGenesSKAT,eval=TRUE--------------------------------------------------
ENV$SKAT_results = ENV$SKAT_results[0, ]
if ((require("org.Hs.eg.db")  &
     require("TxDb.Hsapiens.UCSC.hg19.knownGene")) == TRUE) {
  Mega2SKAT(
    ENV$phe[, 3] - 1 ~ 1,
    "D",
    kernel = "linear.weighted",
    weights.beta = c(0.5, 0.5),
    genes = c('DISP1', 'AK5', 'KIF26B', 'ST7L'),
    envir = ENV
  )
}

## ----FnToGenesa,eval=TRUE-----------------------------------------------------
print(ENV$SKAT_results)

## ----FnToGene2s,eval=TRUE-----------------------------------------------------
ENV$verbose = FALSE
ENV$SKAT_results = ENV$SKAT_results[0, ]
if ((require("org.Hs.eg.db")  &
     require("TxDb.Hsapiens.UCSC.hg19.knownGene")) == TRUE) {
  Mega2SKAT(
    ENV$phe[, 3] - 1 ~ 1,
    "D",
    kernel = "linear.weighted",
    weights.beta = c(0.5, 0.5),
    genes = '*',
    envir = ENV
  )
}

## ----FnToGene2a,eval=TRUE-----------------------------------------------------
print(ENV$SKAT_results)

## ----init_famSKAT,message=FALSE-----------------------------------------------
# Before issuing the next command, make sure you have issued
# this command `dump_mega2rtutorial_data()` first
# as instructed in the "Tutorial Data' section above.
db = file.path(where_mega2rtutorial_data(), "seqsimr.db")
ENV = init_famSKATRC(db, verbose = F)

## ----init_famSKATb------------------------------------------------------------
# The special hack below reduces the samples to 20% of the original, so the run will
# finish in reasonable time. There are 20 different pedigrees.
setfam(ENV$fam[(ENV$fam$pedigree_link %in% 0:3),], ENV)

ENV$phe = mkphenotype(ENV)
ENV$phe[ENV$phe == 0] = NA

## ----init_famSKAT2,eval=FALSE-------------------------------------------------
#  # Before issuing the next command, make sure you have issued
#  # this command `dump_mega2rtutorial_data()` first
#  # as instructed in the "Tutorial Data' section above.
#  db = file.path(where_mega2rtutorial_data(), "seqsimr.db")
#  ENV = init_famSKATRC(db, verbose = T)

## ----FnToRangesfamSKAT,eval=TRUE----------------------------------------------
ENV$verbose = FALSE
Mega2famSKATRC(pheno=3,gs=1:60)

## ----FnToRangesfama,eval=TRUE-------------------------------------------------
print(ENV$famSKATRC_results)

## ----FnToRange2fams,eval=FALSE------------------------------------------------
#  # we will skip this line for the Vignette document production because it takes too long
#  ENV$verbose = FALSE
#  Mega2famSKATRC(pheno=3, gs=1:nrow(ENV$refRanges))
#  print(ENV$famSKATRC_results)

## ----FnToGenesfamSKAT,eval=TRUE-----------------------------------------------
ENV$verbose = TRUE
if ((require("org.Hs.eg.db")  &
     require("TxDb.Hsapiens.UCSC.hg19.knownGene")) == TRUE) {
  Mega2famSKATRC(
    pheno = 3,
    genes = c('DISP1', 'AK5', 'KIF26B', 'ST7L'),
    envir = ENV
  )
  print(ENV$famSKATRC_results)
}

## ----FnToGene2fams,eval=FALSE-------------------------------------------------
#  ENV$verbose = FALSE
#  if ((require("org.Hs.eg.db")  &
#       require("TxDb.Hsapiens.UCSC.hg19.knownGene")) == TRUE) {
#    Mega2famSKATRC(pheno = 3,
#                   genes = '*',
#                   envir = ENV)
#  }
#  print(ENV$famSKATRC_results)

## ----mega2vcflib--------------------------------------------------------------
# Before issuing the next command, make sure you have issued
# this command `dump_mega2rtutorial_data()` first
# as instructed in the "Tutorial Data' section above.
vcfdir = file.path(where_mega2rtutorial_data(), "vcfr")
if (! dir.exists(vcfdir)) dir.create(vcfdir)

## ----mega2vcf-----------------------------------------------------------------
# Before issuing the next command, make sure you have issued
# this command `dump_mega2rtutorial_data()` first
# as instructed in the "Tutorial Data' section above.
vcffile = file.path(where_mega2rtutorial_data(), "vcfr", "vcf.01")
Mega2VCF(vcffile, ENV$markers[ENV$markers$chromosome==1 ,])

## ----lsvcf--------------------------------------------------------------------
# Before issuing the next command, make sure you have issued
# this command `dump_mega2rtutorial_data()` first
# as instructed in the "Tutorial Data' section above.
vcfdir = file.path(where_mega2rtutorial_data(), "vcfr")
list.files(vcfdir)

## ----seqsimgwaa, eval=FALSE---------------------------------------------------
#  require("GenABEL")
#  # Before issuing the next command, make sure you have issued
#  # this command `dump_mega2rtutorial_data()` first
#  # as instructed in the "Tutorial Data' section above.
#  db = file.path(where_mega2rtutorial_data(), "seqsimr.db")
#  ENV = read.Mega2DB(db)
#  
#  # This line converts the database to a gwaa.data-class object. The intermediate
#  # files are in tempdir() and begin with "Mega2GenABEL"
#  seqsimgwaa = Mega2GenABEL()

## ----strseqsimgwaa, eval=FALSE------------------------------------------------
#  str(seqsimgwaa)

## ----mega2gds-----------------------------------------------------------------
# Before issuing the next command, make sure you have issued
# this command `dump_mega2rtutorial_data()` first
# as instructed in the "Tutorial Data' section above.
db = file.path(where_mega2rtutorial_data(), "seqsimr.db")
ENV = read.Mega2DB(db)

## ----mega2gdsfx1--------------------------------------------------------------
# NOTE: the gds file to be created must be closed with the function below,
# or by using an on.exit(closefn.gds(<name>))
showfile.gds(closeall=T, verbose=F)

# Before issuing the next command, make sure you have issued
# this command `dump_mega2rtutorial_data()` first
# as instructed in the "Tutorial Data' section above.
gdsfile = file.path(where_mega2rtutorial_data(), "foo.gds")
gdsn = Mega2gdsfmt(gdsfile, ENV$markers[ENV$markers$chromosome==1 ,], SeqArray=TRUE)

## ----mega2gdspr1--------------------------------------------------------------
print(gdsn)
closefn.gds(gdsn)

## ----mega2gdsfx2,message=FALSE,warning=FALSE----------------------------------
# NOTE: the gds file to be created must be closed with the function below,
# or by using an on.exit(closefn.gds(<name>))
showfile.gds(closeall=T, verbose=F)

## ----mega2gdsfx3,message=FALSE,warning=FALSE----------------------------------
# Before issuing the next command, make sure you have issued
# this command `dump_mega2rtutorial_data()` first
# as instructed in the "Tutorial Data' section above.
gdsfile = file.path(where_mega2rtutorial_data(), "foo.gds")
gdsn = Mega2gdsfmt(gdsfile, ENV$markers[ENV$markers$chromosome==1 ,], SeqArray=FALSE)

## ----mega2gdspr2--------------------------------------------------------------
print(gdsn)
closefn.gds(gdsn)

## ----srdta, eval=FALSE--------------------------------------------------------
#  GotGenABEL = require("GenABEL", quietly=FALSE)
#  if (GotGenABEL) data(srdta) else srdta=NULL

## ----dmpPed,results="hide",warning=FALSE,eval=FALSE---------------------------
#  GotGenABEL = require("GenABEL", quietly=FALSE)
#  
#  # Before issuing the next command, make sure you have issued
#  # this command `dump_mega2rtutorial_data()` first
#  # as instructed in the "Tutorial Data' section above.
#  
#  srdtafile = file.path(where_mega2rtutorial_data(), "srdta")
#  if (GotGenABEL) export.plink(srdta, transpose = FALSE, filebasename = srdtafile,
#               phenotypes = names(srdta@phdata)[-(1:2)])

## ----srdta.db,eval=FALSE------------------------------------------------------
#  GotGenABEL = require("GenABEL", quietly=FALSE)
#  # Before issuing the next command, make sure you have issued
#  # this command `dump_mega2rtutorial_data()` first
#  # as instructed in the "Tutorial Data' section above.
#  sdb = file.path(where_mega2rtutorial_data(), "srdta.db")
#  ENV = read.Mega2DB(sdb)
#  
#  mega = Mega2GenABEL()

## ----strs, eval=FALSE---------------------------------------------------------
#  GotGenABEL = require("GenABEL", quietly=FALSE)
#  str(mega)
#  if (GotGenABEL) str(srdta)

## ----mega2genabeltst, eval=FALSE----------------------------------------------
#  GotGenABEL = require("GenABEL", quietly=FALSE)
#  options(max.print = 30)
#  Mega2GenABELtst(mega_ = mega, gwaa_ = srdta)

## ----fin,eval=TRUE,echo=FALSE-------------------------------------------------
clean_mega2rtutorial_data()

Try the Mega2R package in your browser

Any scripts or data that you put into this service are public.

Mega2R documentation built on May 29, 2024, 1:14 a.m.