dipExtension <- function(breaks, labels, RNAdata, rawRNAdata, minimumCounts){
x.1 <- 0
x.2 <- 0
x.3 <- 0
x.4 <- 0
if(missing(breaks)) {
x.1 <- 0; print("cutting = FALSE")
} else {
x.1 <-1
}
if(missing(labels)) {
x.2 <- 0
} else {
x.2 <-1
}
if(missing(rawRNAdata)) {
x.3 <- 0; print("filtering = FALSE")
} else {
x.3 <-1
}
if(missing(minimumCounts)) {
minimumCounts <- 0
}
print("reading in data")
if(x.1==1) {if(length(breaks) != (length(labels) + 1)) {print("invalid dimensions of breaks and labels")}}
if(mode(RNAdata)=="character") {
RNAdata <- as.matrix(read.table(RNAdata))
}
RNAdataDF <- data.frame(RNAdata)
#filtering of RNAdataDF and DipOutputDF. If the DipOutputDF one fails, move this filter in front of the DipOutputDF creation.
if(x.3 == 1) { print("filtering")
rawRNAdata <- read.table(rawRNAdata)
#row.names(DipOutputDF) <- row.names(RNAdataDF)
rawRNAdata$max <- 0
rawRNAdata$max <- apply(rawRNAdata, 1, max)
RNArawbelow <- rawRNAdata[which(rawRNAdata$max<minimumCounts),]
rr <- row.names(RNArawbelow)
RNAdataDF_R <- RNAdataDF[!(row.names(RNAdataDF) %in% rr),]
RNAdataMat <- as.matrix(RNAdataDF_R)
DipOutput <- matrix(nrow=nrow(RNAdataDF_R), ncol=1)
DipOutputPvals <- matrix(nrow=nrow(RNAdataDF_R), ncol=1)
#DipOutputDF <- DipOutputDF[!(row.names(DipOutputDF)) %in% rr,]
#row.names(DipOutputDF) <- c(1:nrow(DipOutputDF))
}
if(x.3 == 0) {
RNAdataDF_R <- RNAdataDF
DipOutput <- matrix(nrow=nrow(RNAdataDF_R), ncol=1)
DipOutputPvals <- matrix(nrow=nrow(RNAdataDF_R), ncol=1)
}
print("conducting dip analysis")
for (k in 1:nrow(RNAdataMat)){ #dip analysis
e <- diptest::dip(RNAdataMat[k,], full.result = FALSE, min.is.0 = FALSE, debug = FALSE)
f <- diptest::dip.test(RNAdataMat[k,], simulate.p.value = FALSE, B = 2000)
DipOutput[k,1] <- e #save output into pre-prepared matrix
DipOutputPvals[k,1] <- f$p_value
}
DipOutputDF <- data.frame(DipOutput)
DipOutputPDF <- data.frame(DipOutputPvals)
DipOutputDF$GeneID <- rownames(RNAdataDF_R)
DipOutputDF$Dip_p_Vals <- DipOutputPDF[,1]
#cutting
if(x.1 == 1 && x.2 == 1) {
print("cutting")
DipOutputDF$Region <- cut(DipOutputDF$DipOutput,
breaks = breaks,
labels = labels,
right = FALSE)
DipOutputDF$Region <- as.numeric(as.character(DipOutputDF$Region))
} #breaks
ZeroXMedian <- matrix(nrow=nrow(RNAdataDF_R), ncol=1)
ZeroIMedian <- matrix(nrow=nrow(RNAdataDF_R), ncol=1)
row.names(ZeroXMedian) <- row.names(RNAdataDF_R)
colnames(ZeroXMedian) <- c("Zero-excluded Median")
row.names(ZeroIMedian) <- row.names(RNAdataDF_R)
colnames(ZeroIMedian) <- c("Zero-included Median")
print("calculating ZXM and ZIM")
for(k in 1:nrow(RNAdataDF_R)) {
shelf <- RNAdataDF_R[k,] #which(RNAdataDF[k,i] >= 0)
row.names(shelf) <- c(1)
shelf2 <- shelf[,(shelf[1,]>0)]
ZeroXMedian[k,] <- median(as.numeric(shelf2))
ZeroIMedian[k,] <- median(as.numeric(shelf))
}
ReducedZeroXMedian <- ZeroXMedian[!(row.names(ZeroXMedian) %in% rr),]
DipOutputDF$ZeroXMedian <- as.numeric(ZeroXMedian)
DipOutputDF$ZeroIMedian <- as.numeric(ZeroIMedian)
DipOutputDF <- mutate(DipOutputDF, Divergence = ZeroXMedian-ZeroIMedian)
DipOutputDF <- DipOutputDF[,c(2, 1, 3, 4, 5, 6)]
}
plotSamplesByDipRegion <- function(minimumCounts, breaks, labels, rawRNAdata,
RNAdata, xlab, samples) {
# parameters: filter, break locations (1-4 OR "NONE"), labels,
# rawRNAdata, normRNAdata, xlab(xx) (make it work from 1 to 5. No default), samples(number to be sampled)
xx <- xlab
if(missing(filter)) {print("no filter applied")}
if(missing(RNAdata) && missing(rawRNAdata)) {print("RNA data missing")}
if(missing(breaks)) {print("no breaks selected"); regions <- 1}
else(regions <- (length(breaks) + 1))
if(missing(RNAdata) && !missing(rawRNAdata)) {RNAdata <- rawRNAdata}
if(missing(samples)) {samples <- 4}
#RNAdata <- read.table("/Users/sieker/Dropbox/Jeremy Rotation Files/normalized counts/DESeq_normalizedcounts_iNsEndoNsMEFs.txt")
RNAdata <- read.table(RNAdata)
#RNArawcounts <- data.frame(read.table("/Users/sieker/Dropbox/Jeremy Rotation Files/rawcounts/DESeq_rawcounts_iNsEndoNsMEFs.txt"))
RNArawcounts <- read.table(RNAdata)
RNAdataDF <- data.frame(RNAdata)
RNArawcounts$max <- 0
RNArawcounts$max <- apply(RNArawcounts, 1, max)
RNArawbelow50 <- RNArawcounts[which(RNArawcounts$max<minimumCounts),]
rr <- row.names(RNArawbelow50)
RNAdataDF_R <- RNAdataDF[(!row.names(RNAdataDF) %in% rr), ]
RNAdataMat <- as.matrix(RNAdataDF_R)
DipOutput <- matrix(nrow=nrow(RNAdataDF_R), ncol=1) #preparing a matrix to receive output in the function step
DipOutputPvals <- matrix(nrow=nrow(RNAdataDF_R), ncol=1) #preparing a matrix to receive output in the function step
#setup and calculation
for (k in 1:nrow(RNAdataMat)){
e <- diptest::dip(RNAdataMat[k,], full.result = FALSE, min.is.0 = FALSE, debug = FALSE) #actual dip statistical test
f <- diptest::dip.test(RNAdataMat[k,], simulate.p.value = FALSE, B = 2000)
DipOutput[k,1] <- e #save output into pre-prepared matrix
DipOutputPvals[k,1] <- f$p_value
##consider extracting the rownames off RNAdata as a vector, which you can then reimpute here
}
DipOutputDF <- data.frame(DipOutput) # matrix -> dataframe
DipOutputPDF <- data.frame(DipOutputPvals)
DipOutputDF$GeneID <- rownames(RNAdataDF_R)
if(regions==1){labels = "Undivided Sample"}
if(regions==2){labels = c("Region A", "Region B")}
if(regions==3){labels = c("Region A", "Region B", "Region C")}
if(regions==4){labels = c("Region A", "Region B", "Region C", "Region D")}
if(regions==5){labels = c("Region A", "Region B", "Region C", "Region D", "Region E")}
if(breaks!="NONE" | (is.numeric(breaks) && breaks > 0)){
DipOutputDF$Region <- cut(DipOutputDF$DipOutput,
breaks = breaks,
labels = labels,
right = FALSE)
DipOutputDF$Region <- as.numeric(as.character(DipOutputDF$Region))
}
ZeroXMedian <- matrix(nrow=nrow(RNAdataDF_R), ncol=1)
ZeroIMedian <- matrix(nrow=nrow(RNAdataDF_R), ncol=1)
row.names(ZeroXMedian) <- row.names(RNAdataDF_R)
colnames(ZeroXMedian) <- c("Zero-excluded Median")
row.names(ZeroIMedian) <- row.names(RNAdataDF_R)
colnames(ZeroIMedian) <- c("Zero-included Median")
for(k in 1:nrow(RNAdataDF_R)) {
shelf <- RNAdataDF_R[k,]
row.names(shelf) <- c(1)
shelf2 <- shelf[,(shelf[1,]>0)]
ZeroXMedian[k,] <- median(as.numeric(shelf2))
ZeroIMedian[k,] <- median(as.numeric(shelf))
}
DipOutputDF$ZeroXMedian <- as.numeric(ZeroXMedian)
DipOutputDF$ZeroIMedian <- as.numeric(ZeroIMedian)
DipOutputDF <- mutate(DipOutputDF, Divergence = ZeroXMedian-ZeroIMedian)
return(DipOutputDF)
if(samples==1){sublabels = "Sample A"}
if(samples==2){sublabels = c("Sample A", "Sample B")}
if(samples==3){sublabels = c("Sample A", "Sample B", "Sample C")}
if(samples==4){sublabels = c("Sample A", "Sample B", "Sample C", "Sample D")}
if(samples==5){sublabels = c("Sample A", "Sample B", "Sample C", "Sample D", "Sample E")}
if(samples==6){sublabels = c("Sample A", "Sample B", "Sample C", "Sample D", "Sample E", "Sample F")}
#plotting (executables)
if(regions>=1){ #pick four from whole distribution
ifelse(regions==1, DF.1 <- DipOutputDF, DF.1 <- DipOutputDF[DipOutputDF$Region==labels[1],])
ifelse(nrow(DF.1)<samples, print("error: too few observations
remaining in region 1 for declared sampling number to be selected"), print("Region 1 samples selected"))
DF.1Samples <- DF.1[sample(nrow(DF.1), size = samples, replace = FALSE),]
DF.1Names <- DF.1Samples[,"GeneID"]
DF.1SampleGeneData <- data.frame(t(RNAdataDF_R[row.names(RNAdataDF_R) %in% DF.1Names, ]))
SampleNames <- DF.1Names
R1A <- paste(SampleNames[1], collapse = NULL)
pp1 <- ggplot2::ggplot(DF.1SampleGeneData, aes_string(x=R1A)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R1A) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
if(samples>=2){
R1B <- paste(SampleNames[2], collapse = NULL)
pp2 <- ggplot2::ggplot(DF.1SampleGeneData, aes_string(x=R1B)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::xlab(xx) + ggplot2::ggtitle(R1B) + ggplot2::ylab("Density")
}
if(samples>=3){
R1C <- paste(SampleNames[3], collapse = NULL)
pp3 <- ggplot2::ggplot(DF.1SampleGeneData, aes_string(x=R1C)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R1C) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples>=4){
R1D <- paste(SampleNames[4], collapse = NULL)
pp4 <- ggplot2::ggplot(DF.1SampleGeneData, aes_string(x=R1D)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R1D) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples>=5){
R1E <- paste(SampleNames[5], collapse = NULL)
pp5 <- ggplot2::ggplot(DF.1SampleGeneData, aes_string(x=R1E)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R1E) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples>=6){
R1F <- paste(SampleNames[6], collapse = NULL)
pp6 <- ggplot2::ggplot(DF.1SampleGeneData, aes_string(x=R1F)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R1F) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples==1){x1 = pp1}
if(samples==2){x1 = gridExtra::grid.arrange(pp1, pp2, ncol=2)}
if(samples==3){x1 = gridExtra::grid.arrange(pp1, pp2, pp3, ncol=3)}
if(samples==4){x1 = gridExtra::grid.arrange(pp1, pp2, pp3, pp4, ncol=2)}
if(samples==5){x1 = gridExtra::grid.arrange(pp1, pp2, pp3, pp4, pp5, ncol=2)}
if(samples==6){x1 = gridExtra::grid.arrange(pp1, pp2, pp3, pp4, pp5, pp6, ncol=2)}
}
if(regions>=2){
DF.2 <- DipOutputDF[DipOutputDF$Region==labels[2],]
ifelse(nrow(DF.2)<samples, print("error: too few observations
remaining in region 2 for declared number of samples to be selected"), print("Region 2 samples selected"))
DF.2Samples <- DF.2[sample(nrow(DF.2), size = samples, replace = FALSE),]
DF.2Names <- DF.2Samples[,"GeneID"]
DF.2SampleGeneData <- data.frame(t(RNAdataDF_R[row.names(RNAdataDF_R) %in% DF.2Names, ]))
SampleNames2 <- DF.2Names
R2A <- paste(SampleNames2[1], collapse = NULL)
pp1 <- ggplot2::ggplot(DF.2SampleGeneData, aes_string(x=R2A)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R1A) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
if(samples>=2){
R2B <- paste(SampleNames2[2], collapse = NULL)
pp2 <- ggplot2::ggplot(DF.2SampleGeneData, aes_string(x=R2B)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::xlab(xx) + ggplot2::ggtitle(R1B) + ggplot2::ylab("Density")
}
if(samples>=3){
R2C <- paste(SampleNames2[3], collapse = NULL)
pp3 <- ggplot2::ggplot(DF.2SampleGeneData, aes_string(x=R2C)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R1C) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples>=4){
R2D <- paste(SampleNames2[4], collapse = NULL)
pp4 <- ggplot2::ggplot(DF.2SampleGeneData, aes_string(x=R2D)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R1D) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples>=5){
R2E <- paste(SampleNames2[5], collapse = NULL)
pp5 <- ggplot2::ggplot(DF.2SampleGeneData, aes_string(x=R2E)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R1E) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples>=6){
R2F <- paste(SampleNames2[6], collapse = NULL)
pp6 <- ggplot2::ggplot(DF.2SampleGeneData, aes_string(x=R2F)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R1F) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples==1){x2 = pp1}
if(samples==2){x2 = gridExtra::grid.arrange(pp1, pp2, ncol=2)}
if(samples==3){x2 = gridExtra::grid.arrange(pp1, pp2, pp3, ncol=3)}
if(samples==4){x2 = gridExtra::grid.arrange(pp1, pp2, pp3, pp4, ncol=2)}
if(samples==5){x2 = gridExtra::grid.arrange(pp1, pp2, pp3, pp4, pp5, ncol=2)}
if(samples==6){x2 = gridExtra::grid.arrange(pp1, pp2, pp3, pp4, pp5, pp6, ncol=2)}
}
if(regions>=3){
DF.3 <- DipOutputDF[DipOutputDF$Region==labels[3],]
ifelse(nrow(DF.3)<samples, print("error: too few observations
remaining in region 3 for declared number of samples to be selected"), print("Region 3 samples selected"))
DF.3Samples <- DF.3[sample(nrow(DF.3), size = samples, replace = FALSE),]
DF.3Names <- DF.3Samples[,"GeneID"]
DF.3SampleGeneData <- data.frame(t(RNAdataDF_R[row.names(RNAdataDF_R) %in% DF.3Names, ]))
SampleNames3 <- DF.3Names
R3A <- paste(SampleNames3[1], collapse = NULL)
pp1 <- ggplot2::ggplot(DF.3SampleGeneData, aes_string(x=R3A)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R3A) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
if(samples>=2){
R3B <- paste(SampleNames3[2], collapse = NULL)
pp2 <- ggplot2::ggplot(DF.3SampleGeneData, aes_string(x=R3B)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::xlab(xx) + ggplot2::ggtitle(R3B) + ggplot2::ylab("Density")
}
if(samples>=3){
R3C <- paste(SampleNames3[3], collapse = NULL)
pp3 <- ggplot2::ggplot(DF.3SampleGeneData, aes_string(x=R3C)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R3C) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples>=4){
R3D <- paste(SampleNames3[4], collapse = NULL)
pp4 <- ggplot2::ggplot(DF.3SampleGeneData, aes_string(x=R3D)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R3D) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples>=5){
R3E <- paste(SampleNames3[5], collapse = NULL)
pp5 <- ggplot2::ggplot(DF.3SampleGeneData, aes_string(x=R3E)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R3E) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples>=6){
R3F <- paste(SampleNames3[6], collapse = NULL)
pp6 <- ggplot2::ggplot(DF.2SampleGeneData, aes_string(x=R3F)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R3F) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples==1){x3 = pp1}
if(samples==2){x3 = gridExtra::grid.arrange(pp1, pp2, ncol=2)}
if(samples==3){x3 = gridExtra::grid.arrange(pp1, pp2, pp3, ncol=3)}
if(samples==4){x3 = gridExtra::grid.arrange(pp1, pp2, pp3, pp4, ncol=2)}
if(samples==5){x3 = gridExtra::grid.arrange(pp1, pp2, pp3, pp4, pp5, ncol=2)}
if(samples==6){x3 = gridExtra::grid.arrange(pp1, pp2, pp3, pp4, pp5, pp6, ncol=2)}
}
if(regions>=4){
DF.4 <- DipOutputDF[DipOutputDF$Region==labels[4],]
ifelse(nrow(DF.4)<samples, print("error: too few observations
remaining in region 4 for declared number of samples to be selected"), print("Region 4 samples selected"))
DF.4Samples <- DF.4[sample(nrow(DF.4), size = samples, replace = FALSE),]
DF.4Names <- DF.4Samples[,"GeneID"]
DF.4SampleGeneData <- data.frame(t(RNAdataDF_R[row.names(RNAdataDF_R) %in% DF.4Names, ]))
SampleNames4 <- DF.4Names
R4A <- paste(SampleNames4[1], collapse = NULL)
pp1 <- ggplot2::ggplot(DF.4SampleGeneData, aes_string(x=R4A)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R4A) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
if(samples>=2){
R4B <- paste(SampleNames4[2], collapse = NULL)
pp2 <- ggplot2::ggplot(DF.4SampleGeneData, aes_string(x=R4B)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::xlab(xx) + ggplot2::ggtitle(R4B) + ggplot2::ylab("Density")
}
if(samples>=3){
R4C <- paste(SampleNames4[3], collapse = NULL)
pp3 <- ggplot2::ggplot(DF.4SampleGeneData, aes_string(x=R4C)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R4C) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples>=4){
R4D <- paste(SampleNames4[4], collapse = NULL)
pp4 <- ggplot2::ggplot(DF.4SampleGeneData, aes_string(x=R4D)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R4D) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples>=5){
R4E <- paste(SampleNames4[5], collapse = NULL)
pp5 <- ggplot2::ggplot(DF.4SampleGeneData, aes_string(x=R4E)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R4E) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples>=6){
R4F <- paste(SampleNames4[6], collapse = NULL)
pp6 <- ggplot2::ggplot(DF.4SampleGeneData, aes_string(x=R4F)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R4F) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples==1){x4 = pp1}
if(samples==2){x4 = gridExtra::grid.arrange(pp1, pp2, ncol=2)}
if(samples==3){x4 = gridExtra::grid.arrange(pp1, pp2, pp3, ncol=3)}
if(samples==4){x4 = gridExtra::grid.arrange(pp1, pp2, pp3, pp4, ncol=2)}
if(samples==5){x4 = gridExtra::grid.arrange(pp1, pp2, pp3, pp4, pp5, ncol=2)}
if(samples==6){x4 = gridExtra::grid.arrange(pp1, pp2, pp3, pp4, pp5, pp6, ncol=2)}
}
if(regions>=5){
DF.5 <- DipOutputDF[DipOutputDF$Region==labels[5],]
ifelse(nrow(DF.5)<samples, print("error: too few observations
remaining in region 5 for declared number of samples to be selected"), print("Region 5 samples selected"))
DF.5Samples <- DF.5[sample(nrow(DF.5), size = samples, replace = FALSE),]
DF.5Names <- DF.5Samples[,"GeneID"]
DF.5SampleGeneData <- data.frame(t(RNAdataDF_R[row.names(RNAdataDF_R) %in% DF.5Names, ]))
SampleNames5 <- DF.5Names
R5A <- paste(SampleNames5[1], collapse = NULL)
pp1 <- ggplot2::ggplot(DF.5SampleGeneData, aes_string(x=R5A)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R5A) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
if(samples>=2){
R2B <- paste(SampleNames5[2], collapse = NULL)
pp2 <- ggplot2::ggplot(DF.5SampleGeneData, aes_string(x=R5B)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::xlab(xx) + ggplot2::ggtitle(R5B) + ggplot2::ylab("Density")
}
if(samples>=3){
R5C <- paste(SampleNames5[3], collapse = NULL)
pp3 <- ggplot2::ggplot(DF.5SampleGeneData, aes_string(x=R5C)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R5C) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples>=4){
R2D <- paste(SampleNames5[4], collapse = NULL)
pp4 <- ggplot2::ggplot(DF.5SampleGeneData, aes_string(x=R5D)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R5D) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples>=5){
R5E <- paste(SampleNames5[5], collapse = NULL)
pp5 <- ggplot2::ggplot(DF.5SampleGeneData, aes_string(x=R5E)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R5E) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples>=6){
R5F <- paste(SampleNames5[6], collapse = NULL)
pp6 <- ggplot2::ggplot(DF.5SampleGeneData, aes_string(x=R5F)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() + ggplot2::ggtitle(R5F) + ggplot2::ylab("Density") + ggplot2::xlab(xx)
}
if(samples==1){x5 = pp1}
if(samples==2){x5 = gridExtra::grid.arrange(pp1, pp2, ncol=2)}
if(samples==3){x5 = gridExtra::grid.arrange(pp1, pp2, pp3, ncol=3)}
if(samples==4){x5 = gridExtra::grid.arrange(pp1, pp2, pp3, pp4, ncol=2)}
if(samples==5){x5 = gridExtra::grid.arrange(pp1, pp2, pp3, pp4, pp5, ncol=2)}
if(samples==6){x5 = gridExtra::grid.arrange(pp1, pp2, pp3, pp4, pp5, pp6, ncol=2)}
}
return(x1)
try(return(x2))
try(return(x3))
try(return(x4))
try(return(x5))
}
plotSamplesByName <- function(nameList, RNAdata) {
if(missing(ncol) && length(nameList)==any(c(2, 4, 6))) {
ncol <- 2
}
if(missing(ncol) && length(nameList)==any(c(3, 5, 7))) {
ncol <- 3
}
if(missing(ncol) && length(nameList)==1) {
ncol <- 1
}
if(missing(ncol) && length(nameList)==8) {
ncol <- 4
}
RNAdataDF <- data.frame(read.table(RNAdata))
GeneData <- data.frame(t(RNAdataDF[row.nameList(RNAdataDF) %in% nameList, ]))
R1 <- paste(nameList[1], collapse = NULL)
try(R2 <- paste(nameList[2], collapse = NULL))
try(R3 <- paste(nameList[3], collapse = NULL))
try(R4 <- paste(nameList[4], collapse = NULL))
try(R5 <- paste(nameList[5], collapse = NULL))
try(R6 <- paste(nameList[6], collapse = NULL))
try(R7 <- paste(nameList[7], collapse = NULL))
try(R8 <- paste(nameList[8], collapse = NULL))
pp1 <- ggplot2::ggplot(GeneData, aes_string(x=R1)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() +
ggplot2::ggtitle(R1) + ggplot2::ylab("Density") + ggplot2::xlab("Relative Expression")
if(!is.na(nameList[2])) {
pp2 <- ggplot2::ggplot(GeneData, aes_string(x=R2)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() +
ggplot2::ggtitle(R2) + ggplot2::ylab("Density") + ggplot2::xlab("Relative Expression")
}
if(!is.na(nameList[3])) {
pp3 <- ggplot2::ggplot(GeneData, aes_string(x=R3)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() +
ggplot2::ggtitle(R3) + ggplot2::ylab("Density") + ggplot2::xlab("Relative Expression")
}
if(!is.na(nameList[4])) {
pp4 <- ggplot2::ggplot(GeneData, aes_string(x=R4)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() +
ggplot2::ggtitle(R4) + ggplot2::ylab("Density") + ggplot2::xlab("Relative Expression")
}
if(!is.na(nameList[5])) {
pp5 <- ggplot2::ggplot(GeneData, aes_string(x=R5)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() +
ggplot2::ggtitle(R5) + ggplot2::ylab("Density") + ggplot2::xlab("Relative Expression")
}
if(!is.na(nameList[6])) {
pp6 <- ggplot2::ggplot(GeneData, aes_string(x=R6)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() +
ggplot2::ggtitle(R6) + ggplot2::ylab("Density") + ggplot2::xlab("Relative Expression")
}
if(!is.na(nameList[7])) {
pp7 <- ggplot2::ggplot(GeneData, aes_string(x=R7)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() +
ggplot2::ggtitle(R7) + ggplot2::ylab("Density") + ggplot2::xlab("Relative Expression")
}
if(!is.na(nameList[8])) {
pp8 <- ggplot2::ggplot(GeneData, aes_string(x=R8)) + ggplot2::geom_density(adjust = 1/5, color = "navy blue") + ggplot2::theme_gray() +
ggplot2::ggtitle(R8) + ggplot2::ylab("Density") + ggplot2::xlab("Relative Expression")
}
if(length(nameList)==1){
x1 <- pp1
}
if(length(nameList)==2){
x1 <- gridExtra::grid.arrange(pp1, pp2, ncol = ncol)
}
if(length(nameList)==3){
x1 <- gridExtra::grid.arrange(pp1, pp2, pp3, ncol = ncol)
}
if(length(nameList)==4){
x1 <- gridExtra::grid.arrange(pp1, pp2, pp3, pp4, ncol = ncol)
}
if(length(nameList)==5){
x1 <- gridExtra::grid.arrange(pp1, pp2, pp3, pp4, pp5, ncol = ncol)
}
if(length(nameList)==6){
x1 <- gridExtra::grid.arrange(pp1, pp2, pp3, pp4, pp5, pp6, ncol = ncol)
}
if(length(nameList)==7){
x1 <- gridExtra::grid.arrange(pp1, pp2, pp3, pp4, pp5, pp6, pp7, ncol = ncol)
}
if(length(nameList)==8){
x1 <- gridExtra::grid.arrange(pp1, pp2, pp3, pp4, pp5, pp6, pp7, pp8, ncol = ncol)
}
}
previewDipDistribution <- function(RNAdata, rawRNAdata, minimumCounts, barLine){
RNAdataMat <- as.matrix(RNAdata)
if(missing(barLine)){barLine <- 0}
if(missing(rawRNAdata)){
RNAdataMat <- as.matrix(RNAdata)
} else{
RNAdataMat <- as.matrix(RNAdata)
RNAdataDF <- data.frame(RNAdataMat)
rawRNAdata$max <- 0
rawRNAdata$max <- apply(rawRNAdata, 1, max)
RNArawbelow <- rawRNAdata[which(rawRNAdata$max<minimumCounts),]
rr <- row.names(RNArawbelow)
RNAdataDF_R <- RNAdataDF[!(row.names(RNAdataDF) %in% rr),]
RNAdataMat <- as.matrix(RNAdataDF_R)
}
DipOutput <- matrix(nrow=nrow(RNAdataMat), ncol=2) #preparing a matrix to receive output in the function step
for (k in 1:nrow(RNAdataMat)){
e <- diptest::dip(RNAdataMat[k,], full.result = FALSE, min.is.0 = FALSE, debug = FALSE) #actual dip statistical test
f <- diptest::dip.test(RNAdataMat[k,], simulate.p.value = FALSE, B = 2000)
DipOutput[k,1] <- e #save output into pre-prepared matrix
DipOutput[k,2] <- f$p_value
##consider extracting the rownames off RNAdata as a vector, which you can then reimpute here
}
DipOutputDF <- data.frame(DipOutput)
colnames(DipOutputDF) <- c("Dip", "p-value")
row.names(DipOutputDF) <- row.names(RNAdataMat)
return(DipOutputDF)
x1 <- ggplot2::ggplot(DipOutputDF, aes(x=Dip)) +
ggplot2::geom_density() + ggplot2::geom_vline(xintercept=barLine, colour="#FF9999") +
ggplot2::scale_x_continuous(breaks = round(seq(min(DipOutputDF$Dip), max(DipOutputDF$Dip), by = 0.01),2)) +
ggplot2::theme(plot.title = element_text(hjust = 0.5)) + ggplot2::labs(title= "Bimodality among RNA expression patterns",x="Hartigan's Dip Score")
x2 <- ggplot2::ggplot(DipOutputDF, aes(x = p-value)) +
ggplot2::geom_density() + ggplot2::geom_vline(xintercept = barLine, colour = "#FF9999") +
ggplot2::scale_x_continuous(breaks = round(seq(min(DipOutputDF$p-value),
max(DipOutputDF$p-value), by = 0.01), 2)) + ggplot2::theme(plot.title = element_text(hjust = 0.5)) +
ggplot2::labs(title = "Bimodality among RNA expression patterns",
x = "Hartigan's Dip Score")
return(x1)
return(x2)
}
#make the adjust into an argument
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.