## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
suppressMessages(library(EMclassifieR))
## -----------------------------------------------------------------------------
tablePath="csv/MatrixLog_relCoord_ampTSS.csv"
matTable<-read.csv(system.file("extdata", tablePath, package = "EMclassifieR",
mustWork=TRUE), stringsAsFactors=F)
head(matTable)
## -----------------------------------------------------------------------------
i=1
dataMatrix<-readRDS(system.file("extdata", matTable$filename[i], package = "EMclassifieR", mustWork=TRUE))
dim(dataMatrix)
dataMatrix<-removeNArows(dataMatrix)
dim(dataMatrix)
# dataMatrix<-readRDS("~/Documents/MeisterLab/myPackages/EMclassifieR/vignettes/EMres_cosine/dS02-182_WBGene00015947_K2.rds")
# dataMatrix<-removeNArows(dataMatrix)
#
# colourChoice=list(low="blue", mid="white", high="red", bg="white", lines="grey80")
# colourChoice=list(low="yellow", mid="grey20", high="red", bg="black", lines="grey80")
#
#
# p<-plotClassesSingleMolecule(dataMatrix,
# xRange=c(-250,250), title="Reads by classes",
# myXlab="CpG/GpC position",
# featureLabel="TSS", baseFontSize=12,
# segmentSize=5,
# colourChoice=colourChoice)
# print(p)
## ----singleGeneEM,eval=F------------------------------------------------------
#
# k_range = 2:8 # Number of classes to be found
# maxIterations = 100 # number of iterations of EM clustering to perform if it does not converge
# convergenceError = 10e-6
# numRepeats=10 # number of repeats of clustering each matrix (to account for fraction of methylation)
# xRange=c(-250,250)
# maxB=50 # Number of randomised matrices to generate
# outPath="./EMres"
# maxTasks=4
# taskId=1
# nThreads=4
# setSeed=FALSE
# distMetric=list(name="cosineDist",rescale=T)
#
# if(!dir.exists(outPath)){
# dir.create(outPath)
# }
#
# #split table indicies into nTasks number of groups
# taskSubList<-split(1:nrow(matTable),sort(1:nrow(matTable)%%maxTasks))
#
# set.seed(200413)
# for (i in taskSubList[[taskId]]){
# #for (i in 1:nrow(matTable)) {
# regionName=matTable$region[i]
# sampleName=matTable$sample[i]
# outFileBase=paste(sampleName, regionName, sep="_")
# print(paste("Clustering", outFileBase))
# dataMatrix<-readRDS(system.file("extdata", matTable$filename[i],
# package = "EMclassifieR", mustWork=TRUE))
# dim(dataMatrix)
# dataMatrix<-removeNArows(dataMatrix,maxNAfraction=0.2)
# #dataMatrix<-recodeMatrixAsNumeric(dataMatrix)
# dim(dataMatrix)
#
# allClassMeans<-tryCatch(
# {
# print("running EM for a range of class numbers")
# runEMrangeClassNum(dataMatrix, k_range, convergenceError, maxIterations,
# EMrepeats=numRepeats, outPath=outPath, xRange=xRange,
# outFileBase=paste(sampleName, regionName, sep="_"),
# doIndividualPlots=FALSE, distMetric=distMetric)
#
# },
# error=function(e){"Matrix not valid"}
# )
#
# if(is.list(allClassMeans)){
# saveRDS(allClassMeans,paste0(outPath,"/allClassMeans_",outFileBase,".rds"))
# } else {
# print(allClassMeans) # error message
# }
#
# clustMetrics<-tryCatch(
# {
# print("plotting clustering metrics for a range of class sizes")
# plotClusteringMetrics(dataMatrix, k_range, maxB, convergenceError,
# maxIterations, outPath, outFileBase, EMrep=NULL, nThreads=nThreads,
# setSeed=setSeed, distMetric=distMetric)
# },
# error=function(e){"Matrix not valid"}
# )
# if(length(clustMetrics)==1) {
# print(clustMetrics)
# }
#
# pcaPlots<-tryCatch(
# {
# print("plotting PCA of clusters")
# plotPCAofMatrixClasses(k_range, outPath, outFileBase)
# },
# error=function(e){"Matrix not valid"}
# )
# if(length(pcaPlots)==1) {
# print(pcaPlots)
# }
#
# umapPlots<-tryCatch(
# {
# print("plotting UMAP of clusters")
# plotUMAPofMatrixClasses(k_range, outPath, outFileBase)
# },
# error=function(e){"Matrix not valid"}
# )
# if(length(umapPlots)==1) {
# print(umapPlots)
# }
#
# }
#
## ----classesWithTracks,eval=F-------------------------------------------------
#
#
# dataMatrix<-readRDS(system.file("extdata", matTable$filename[i], package = "EMclassifieR", mustWork=TRUE))
# regionName=matTable$region[i]
# sampleName=matTable$sample[i]
# outFileBase=paste(sampleName, regionName, sep="_")
#
# allClassMeansList<-readRDS("/Users/semple/Documents/MeisterLab/myPackages/EMclassifieR/inst/extdata/rds/allClassMeans_dS02-182_WBGene00015947.rds")
# numClasses=5
# allClassMeans<-allClassMeansList[[numClasses]]
# allClassMeans
#
# ampTSS<-readRDS('/Users/semple/Documents/MeisterLab/myPackages/EMclassifieR/inst/extdata/rds/ampliconMaxTSSgr.RDS')
#
#
# winSize=500
# tssWin<-ampTSS
# GenomicRanges::mcols(tssWin)$TSS<-GenomicRanges::start(tssWin)
# tssWin<-GenomicRanges::resize(tssWin,width=winSize,fix="center")
#
#
# names(GenomicRanges::mcols(tssWin))[1]<-"ID"
# regionGR<-tssWin[i]
# featureGR<-ampTSS[i]
# regionName=matTable$region[i]
# sampleName=matTable$sample[i]
# outFileBase=paste(sampleName, regionName, sep="_")
# #seqlevels(ampTSS)<-seqlevels(Celegans)
# #seqinfo(ampTSS)<-seqinfo(Celegans)
# #saveRDS(ampTSS,'/Users/semple/Documents/MeisterLab/myPackages/EMclassifieR/inst/extdata/rds/ampliconMaxTSSgr.RDS')
#
# # genomeVer="WS275"
# # genomeDir=paste0("~/Documents/MeisterLab/GenomeVer/",genomeVer)
# # txdb<-AnnotationDbi::loadDb(paste0(genomeDir,
# # "/annotations/c_elegans.PRJNA13758.",
# # genomeVer, ".annotations.sqlite"))
# # seqlevelsStyle(txdb)<-"ucsc"
# # GenomeInfoDb::genome(txdb)<-GenomeInfoDb::genome(regionGR)
# #library("TxDb.Celegans.UCSC.ce11.refGene")
# txdb <- TxDb.Celegans.UCSC.ce11.refGene::TxDb.Celegans.UCSC.ce11.refGene
# bigwigListFile<-"~/Documents/MeisterLab/sequencingData/public/bigwigFileList.txt"
# #bigwigListFile<-"~/Documents/MeisterLab/sequencingData/public/bigwigFileList_kranz.txt"
# strandedBwFile<-"~/Documents/MeisterLab/sequencingData/public/strandedBwFile.txt"
# gr<-allClassMeansToGR(allClassMeans, regionGR)
#
# dTrack<-Gviz::DataTrack(gr,name="dSMF Classes")
# Gviz::plotTracks(dTrack, groups=rep(c(paste0("class",1:numClasses)), times=10),
# type=c("a","p","confint"))
#
# print(txdb)
# outPath="./EMres"
# grDevices::pdf(paste0(outPath,"/tracks_",
# outFileBase,"_K",
# numClasses, ".pdf"),
# paper="a4", height=11, width=8)
#
# plotClassMeansWithTracks(allClassMeans, regionGR, "middle", txdb, featureGR,
# bigwigListFile, strandedBwFile)
# dev.off()
## ----multigeneEM, eval=F------------------------------------------------------
# tablePath="csv/MatrixLog_relCoord_ampTSS.csv"
# matTable<-read.csv(system.file("extdata", tablePath, package = "EMclassifieR",
# mustWork=TRUE), stringsAsFactors=F)
# matTable<-matTable[!is.na(matTable$filename),]
#
# head(matTable)
# multiGeneMat<-NULL
# genesIncluded<-0
# minReads=50
# binSize=20
# maxNAfraction=0.2
#
# for(i in 1:nrow(matTable[matTable$sample=="dS02-182"])){
# regionName=matTable$region[i]
# sampleName=matTable$sample[i]
# outFileBase=paste(sampleName, regionName, sep="_")
# print(paste0("reading matrix number ",i))
# dataMatrix<-readRDS(system.file("extdata", matTable$filename[i],
# package = "EMclassifieR", mustWork=TRUE))
# #Make sure matrix rows do not have too many NAs
# dataMatrix<-removeNArows(dataMatrix, maxNAfraction=maxNAfraction, removeAll0=T)
# subMatrix<-selectReadsFromMatrix(dataMatrix,minReads=minReads,
# addToReadName=outFileBase,
# preferBest=T)
# if(!is.null(subMatrix)){
# fullMatrix<-getFullMatrix(subMatrix)
# winMatrix<-prepareWindows(fullMatrix, binSize=binSize)
# genesIncluded<-genesIncluded+1
# if(is.null(multiGeneMat)){
# multiGeneMat<-winMatrix
# } else {
# multiGeneMat<-rbind(multiGeneMat,winMatrix)
# }
# }
# }
# print(paste(genesIncluded,"genes included in the multi gene matrix"))
#
# #multiGeneMat<-rescale_minus1To1(multiGeneMat)
# #multiGeneMat<-rescale_0To1(multiGeneMat)
# #multiGeneMat<-recodeMatrixAsNumeric(multiGeneMat)
#
# k_range = 2:8 # Number of classes to be found
# maxIterations = 100 # number of iterations of EM clustering to perform if it does not converge
# convergenceError = 10e-6
# numRepeats=10 # number of repeats of clustering each matrix (to account for fraction of methylation)
# xRange=c(-250,250)
# maxB=50 # Number of randomised matrices to generate
# outPath="./EMres_multigene_test_mi"
# maxTasks=4
# taskId=1
# nThreads=4
# setSeed=FALSE
# #distMetric=list(name="cosineDist",valNA=0,rescale=F)
# #distMetric=list(name="canberraDist",rescale=T)
# #distMetric=list(name="correlationDist",valNA=0.5,rescale=T)
# #distMetric=list(name="euclidean",valNA=0.5,rescale=T)
# distMetric=list(name="mutualInformation",valNA=0.5,rescale=F)
#
# if(!dir.exists(outPath)){
# dir.create(outPath)
# }
#
#
# set.seed(200413)
#
# regionName="multiGene"
# sampleName="dS02-182"
# outFileBase=paste(sampleName, regionName, sep="_")
# print(paste("Clustering", outFileBase))
# dataMatrix<-multiGeneMat
# dim(dataMatrix)
# #dataMatrix<-removeNArows(dataMatrix,maxNAfraction=0)
# dim(dataMatrix)
#
# pdf(file=paste0("hist_w",binSize,"_NA",maxNAfraction,"_genes",genesIncluded,".pdf"),width=11, height=8, paper="a4r")
# hist(rowSums(is.na(multiGeneMat)),main=paste0("winSize: ",binSize,", maxNA: ",maxNAfraction,
# ", genesIncluded:",genesIncluded),
# xlab="number of NA windows per molecule",xlim=c(0,500),col="blue")
# dev.off()
# allClassMeans<-tryCatch(
# {
# print("running EM for a range of class numbers")
# runEMrangeClassNum(dataMatrix, k_range, convergenceError, maxIterations,
# EMrepeats=numRepeats, outPath=outPath, xRange=xRange,
# outFileBase=paste(sampleName, regionName, sep="_"),
# doIndividualPlots=FALSE, distMetric=distMetric)
#
# },
# error=function(e){"Matrix not valid"}
# )
#
# if(is.list(allClassMeans)){
# saveRDS(allClassMeans,paste0(outPath,"/allClassMeans_",outFileBase,".rds"))
# } else {
# print(allClassMeans) # error message
# }
#
# clustMetrics<-tryCatch(
# {
# print("plotting clustering metrics for a range of class sizes")
# plotClusteringMetrics(dataMatrix, k_range, maxB, convergenceError,
# maxIterations, outPath, outFileBase, EMrep=NULL,
# nThreads=nThreads,
# setSeed=setSeed, distMetric=distMetric)
# },
# error=function(e){"Matrix not valid"}
# )
# if(length(clustMetrics)==1) {
# print(clustMetrics)
# }
#
# pcaPlots<-tryCatch(
# {
# print("plotting PCA of clusters")
# plotPCAofMatrixClasses(k_range, outPath, outFileBase)
# },
# error=function(e){"Matrix not valid"}
# )
# if(length(pcaPlots)==1) {
# print(pcaPlots)
# }
#
# umapPlots<-tryCatch(
# {
# print("plotting UMAP of clusters")
# plotUMAPofMatrixClasses(k_range, outPath, outFileBase)
# },
# error=function(e){"Matrix not valid"}
# )
# if(length(umapPlots)==1) {
# print(umapPlots)
# }
#
# print("plotting classes per gene")
# plotGenesPerClass(k_range, outPath, outFileBase)
#
# # to use -1 to 1
# #https://stats.stackexchange.com/questions/256917/can-an-unstandarized-beta-distribution-have-a-negative-domain
#
## ----classifyWithKnownClasses,eval=F------------------------------------------
# allClassMeans<-readRDS(system.file("extdata",
# "EMres/allClassMeans_dS02-182_multiGene.rds",
# package = "EMclassifieR", mustWork=T))
#
# classes<-getClassMeansMatrix(allClassMeans,numClasses=6)
#
#
#
# classes<-as.matrix(readRDS(paste0("../../../sequencingData/TCI_clusters/euc8-10_w30/classMeans_w30_euc.rds")))
# classes<-classes[order(rowMeans(classes)),]
# saveRDS(classes,paste0("../../../sequencingData/TCI_clusters/euc8-10_w30/classMeans_w30_euc_sort.rds"))
# rowMeans(classes)
# classes<-padEndToRange(classes,xRange=c(-250,250))
# classes<-readRDS(paste0("../../../sequencingData/TCI_clusters/euc8-10_w30/classMeans_w30_euc_sort.rds"))
#
# p<-plotClassMeans(classes,xRange=c(-250,250), facet=TRUE,
# title="Class means",
# myXlab="CpG/GpC position",featureLabel="TSS",
# baseFontSize=12)
#
# ggplot2::ggsave("../../../sequencingData/TCI_clusters/euc8-10_w30/classMeans_w30_euc_sort.pdf",p,
# height=11,width=4,device="pdf")
#
# dataMatrix<-readRDS(system.file("extdata",
# "EMres/dS02-182_multiGene_K6.rds",
# package="EMclassifieR", mustWork=T))
# dataMatrix<-readRDS(system.file("extdata",
# "rds/relCoord_ampTSS/dS02-182_WBGene00009620.rds",
# package="EMclassifieR", mustWork=T))
#
# orderedMat<-dataMatrix
# dataMatrix<-dataMatrix[sample(1:dim(dataMatrix)[1],replace=F),]
#
# rownames(dataMatrix)<-gsub("__class.*$","",rownames(dataMatrix))
#
# distMetric=list(name="euclidean",rescale=T) #list(name="euclidean")
#
# dataOrderedByClass<-runClassLikelihoodRpts(dataMatrix, classes,
# numRepeats=20, outPath=".",
# xRange=c(-250,250), outFileBase="test",
# distMetric=distMetric, classesToPlot=NULL)
#
# dataMatrix<-dataOrderedByClass[-grep("__class0[1,2,3,4]",row.names(dataOrderedByClass)),]
#
# tablePath="csv/MatrixLog_relCoord_ampTSS.csv"
# matTable<-read.csv(system.file("extdata", tablePath, package = "EMclassifieR",
# mustWork=TRUE), stringsAsFactors=F)
#
#
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.