CpGassoc.R
In CpGassoc: Association Between Methylation and a Phenotype of Interest

### R code from vignette source 'CpGassoc.Rnw'

###################################################
### code chunk number 1: CpGassoc.Rnw:118-137
###################################################
#Sample output from CpGassoc 
###NOTE: If you are dealing with large data, do not specify large.data=FALSE. The default option is true
##This will involve partitioning up the data and performing more gc() to clear up space
library(CpGassoc)
data(samplecpg,samplepheno,package="CpGassoc")
results<-cpg.assoc(samplecpg,samplepheno$weight,large.data=FALSE)
results
#Analysis with covariates. There are multiple ways to do this. One can define the
#dataframe prior or do it in the function call or as a function such as ~Cov1+Cov2.
#We will do it in the function call
test<-cpg.assoc(samplecpg,samplepheno$weight,data.frame(samplepheno$Distance,samplepheno$Dose),large.data=FALSE)

#Doing a mixed effects model. This does take more time, so we will do a subset of
#the samplecpg
randtest<-cpg.assoc(samplecpg[1:10,],samplepheno$weight,chip.id=samplepheno$chip,random=TRUE,large.data=FALSE)

#summary function will work on items of class cpg.




###################################################
### code chunk number 2: CpGassoc.Rnw:190-198
###################################################
library(CpGassoc)
data(samplecpg,samplepheno,package="CpGassoc")
###NOTE: If you are dealing with large data, do not specify large.data=FALSE. The default option is true
##This will involve partitioning up the data and performing more gc() to clear up space
test1<-cpg.assoc(samplecpg[1:100,],samplepheno$weight,large.data=FALSE)
test2<-cpg.assoc(samplecpg[101:200,],samplepheno$weight,large.data=FALSE)
overall<-cpg.combine(list(test1,test2))
overall


###################################################
### code chunk number 3: CpGassoc.Rnw:282-299
###################################################
##Loading the data
library(CpGassoc)
data(samplecpg,samplepheno,package="CpGassoc")
###NOTE: If you are dealing with large data, do not specify large.data=FALSE. The default option is true
##This will involve partitioning up the data and performing more gc() to clear up space
#Performing a permutation 10 times
Testperm<-cpg.perm(samplecpg,samplepheno$weight,data.frame(samplepheno$Dose,samplepheno$Distance),
                seed=2314,nperm=10,large.data=FALSE)
Testperm
#All the contents of CpGassoc are included in the output from Testperm
#Using the output from CpGassoc in the example
test<-cpg.assoc(samplecpg,samplepheno$weight,data.frame(samplepheno$Distance,samplepheno$Dose),large.data=FALSE)
all.equal(Testperm$results,test$results)

#summary function works on objects of class cpg.perm
summary(Testperm)



###################################################
### code chunk number 4: CpGassoc.Rnw:351-357
###################################################
library(CpGassoc)
data(samplecpg,samplepheno,package="CpGassoc")
results<-cpg.assoc(samplecpg,samplepheno$weight,large.data=FALSE)

cpg.GC(results)
##If the genomic inflation factor is less than one there is no need for adjustment


###################################################
### code chunk number 5: CpGassoc.Rnw:418-419
###################################################
##See the examples in the CpGassoc tutorial.


###################################################
### code chunk number 6: CpGassoc.Rnw:511-513
###################################################
##See the examples listed in cpg.assoc for ways in which to use cpg.work.
##Just change the cpg.assoc to cpg.work.


###################################################
### code chunk number 7: CpGassoc.Rnw:569-582
###################################################
library(CpGassoc)
data(samplecpg,samplepheno,package="CpGassoc")
#Example where there are covariates:
covar<-data.frame(samplepheno$weight,samplepheno$Distance)
test<-design(covar,samplepheno$SBP,samplepheno$chip,FALSE)
dim(test$full)
dim(test$reduced)
test$reduced[1:5,1:5]
test$full[1:5,1:5]
#When no covariates or chip.id:
test2<-design(NULL,samplepheno$SBP,NULL,FALSE)
dim(test2$full)
dim(test2$reduced)


###################################################
### code chunk number 8: manhattan
###################################################
#Doing a Manhattan plot. First load the data:

#Doing a Manhattan plot. First load the data:
library(CpGassoc)
data(samplecpg,samplepheno,annotation,package="CpGassoc")
###NOTE: If you are dealing with large data, do not specify large.data=FALSE. The default option is true
##This will involve partitioning up the data and performing more gc() to clear up space
examplemanhat<-cpg.assoc(samplecpg,samplepheno$Disease,large.data=FALSE)

manhattan(examplemanhat,annotation$TargetID,annotation$CHR,annotation$MAPINFO)



###################################################
### code chunk number 9: QQ_plot
###################################################
##Using the results from the example given in cpg.assoc.
###NOTE: If you are dealing with large data, do not specify large.data=FALSE. The default option is true
##This will involve partitioning up the data and performing more gc() to clear up space
##QQ Plot:
library(CpGassoc)
data(samplecpg,samplepheno,package="CpGassoc")
test<-cpg.assoc(samplecpg,samplepheno$weight,data.frame(samplepheno$Distance,samplepheno$Dose),large.data=FALSE)
plot(test)
##t-statistic plot:
plot(test,tplot=TRUE)

##Now an example of sort
head(sort(test)$results)

##Summary
summary(test)


###################################################
### code chunk number 10: Perm_qqplot
###################################################
library(CpGassoc)
data(samplecpg,samplepheno,package="CpGassoc")
##We will do the analysis on a subset to save time
###NOTE: If you are dealing with large data, do not specify large.data=FALSE. The default option is true
##This will involve partitioning up the data and performing more gc() to clear up space
#The qq plot:
Testperm<-cpg.perm(samplecpg,samplepheno$weight,data.frame(samplepheno$Dose,samplepheno$Distance),
                seed=2314,nperm=10,large.data=FALSE)
plot(Testperm)
#The t-statistic plot from cpg.perm has confidence intervals since we were allowed to perform permutations on the T-values.
plot(Testperm,tplot=TRUE)
#If there was 100 or more permutations, there would be emperical confidence intervals.

###Now for Sort
head(sort(Testperm)$results)
head(Testperm$results)


###################################################
### code chunk number 11: Scatterplot
###################################################
#Load the data:
data(samplecpg,samplepheno,package="CpGassoc")
library(CpGassoc)
###NOTE: If you are dealing with large data, do not specify large.data=FALSE. The default option is true
##This will involve partitioning up the data and performing more gc() to clear up space
test<-cpg.assoc(samplecpg,samplepheno$weight,large.data=FALSE)
##Using rank, will plot the top three sites in order of significance:
scatterplot(test,cpg.rank=c(1:3),user.indep=samplepheno$weight)
##Using name, specify three sites:
scatterplot(test,cpg.name=c("CpG1182","CpG1000","CpG42"),user.indep=samplepheno$weight)

##Plotting something that is categorical in nature:
test2<-cpg.assoc(samplecpg,factor(samplepheno$Disease),large.data=FALSE)
scatterplot(test2,c(2),user.indep=as.factor(samplepheno$Disease))