knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
In this article, I present how to run the real data analysis in computer cluster, including:
Follow the steps:
Create a folder named Analysis
. Within the folder, include your raw data in the sub folder ./Data/JA
. Assume the raw data is paired .txt files, named UNIQUEIDDoubleSound.txt
and UNIQUEIDDoubleSound_spiketimes.txt
, storing the condition of the experiment and the spike times records respectively.
Run the R code in Preparation
section, to create a filenames.txt
file stored in folder Analysis
, including all the paired file names. Run the R code in Preparation
section, to separate the raw data into n_part
parts (I set 3 parts here.) Create several filenames_NUMBER.txt
files stored in folder Analysis
, including part of the paired file names respectively.
Create a R file named prml_analysis.R
within folder Analysis
following the section R Script (Parallel Version)
.
Create a Batch script named prml_analysis.sh
within folder Analysis
following the section Batch Script
.
Create a folder others
within folder Analysis
to store the output and error messages from the R script and Batch script.
Upload the folder Analysis
to the compute cluster following the section Upload Files
.
Download the Singularity file to the folder Analysis
as shown in section Singularity file
. Notice, once it is download, next time you want to use it, you only need to copy it to your another analysis folder.
Run in the compute cluster by following section Run in Compute Cluster
. And you can also check the status of your jobs.
After all the jobs finish, fetch the results to your local folder and visualize your result. Section Fetch Results and Visualizations
shows how to gather the results and do basic data cleaning before visualization.
Some notes. See section Notes
.
If you want to apply prml_tests_f()
instead of prml_tests()
, just need to change the R script to the one shown in section Another R Script Example
.
Make sure you have all the paired raw data files in ./Data/JA
folder. The following code is for:
filenames.txt
file, extracting all the paired file names from folder ./Data/JA
. filenames_.txt
by splitting the files to n_part
parts library("dplyr") library("purrr") library("stringr") # Create a `filenames.txt` file, extracting all the paired file names from folder `./Data/JA`. filenames <- list.files(path = "./Data/JA",pattern = "DoubleSound\\.txt")%>% str_split(.,".txt",simplify = TRUE)%>% .[,1] write(filenames,file = "filenames.txt") #split the files to `n_part` parts. #fnames -- a list of names of the .txt files in the folder fnames <- scan("filenames.txt", "a") n_part <- 3 flist <- split(fnames, seq_along(fnames)%%n_part+1) map(1:length(flist),~write(flist[[.x]],file = paste0("filenames_",.x,".txt")))
Write a R script (named prml_analysis.R
).
In Real Data Analysis
article, I parallel the nested for loop in the prml.from.fname
through all the possible triplets with n_parallel
multithreading. And I also consider splitting the filenames.txt
to file_number
parts, named filenames_.txt
respectively and consider parallelization in compute cluster.
Arguments for the prml_analysis.R
script:
n_parallel
: number of threads within R level parallelization file_number
: the indicator for the specific subfile. This is within compute cluster level parallelization (using sbtach array){ args = commandArgs(trailingOnly = TRUE) stopifnot(length(args) == 2) n_parallel = args[1] file_number = args[2] suppressMessages(library("statmod")) suppressMessages(library("dplyr")) suppressMessages(library("purrr")) suppressMessages(library("prml")) # prml.from.tri prml.from.tri <- function(trials, spiketimes, frq = c(1100, 742), pos = c(24,-6), on.reward = TRUE, start.time = 0, end.time = 600, match.level = FALSE, AB.eqlevel = FALSE, go.by.soff = FALSE, remove.zeros = FALSE, ...) { attach(trials) attach(spiketimes) timestamps <- split(TIMES, TRIAL2) ntrials <- length(timestamps) trial.id <- as.numeric(names(timestamps)) ix1 <- TASKID == 8 & A_FREQ == frq[1] & XA == pos[1] ix2 <- TASKID == 8 & A_FREQ == frq[2] & XA == pos[2] ix3 <- TASKID == 12 & (A_FREQ == frq[1] & B_FREQ == frq[2] & XA == pos[1] & XB == pos[2]) | (A_FREQ == frq[2] & B_FREQ == frq[1] & XA == pos[2] & XB == pos[1]) if (on.reward) { ix1 <- ix1 & REWARD == 1 ix2 <- ix2 & REWARD == 1 ix3 <- ix3 & REWARD == 1 } blev <- sort(unique(B_LEVEL[ix3])) targ.lev <- blev[blev > 0] lev <- "*" if (match.level) { if (length(targ.lev) > 1) { targ.lev <- max(targ.lev) warning("Multiple single sound levels, choosing largest one") } ix1 <- ix1 & A_LEVEL == targ.lev ix2 <- ix2 & A_LEVEL == targ.lev lev <- as.character(targ.lev) } if (AB.eqlevel) ix3 <- ix3 & (A_LEVEL == B_LEVEL) sing1 <- trials[ix1, 1] sing2 <- trials[ix2, 1] duplx <- trials[ix3, 1] success <- REWARD[ix3] if (go.by.soff) end.time <- min(SOFF[ix1 | ix2 | ix3]) spike.counter <- function(jj) { jj1 <- match(jj, trial.id) spks <- timestamps[[jj1]] return(sum(spks > start.time & spks < end.time)) } Acounts <- sapply(sing1, spike.counter) Bcounts <- sapply(sing2, spike.counter) ABcounts <- sapply(duplx, spike.counter) detach(trials) detach(spiketimes) if ((mean(Acounts) == 0) | (mean(Bcounts == 0)) | (mean(ABcounts == 0))) { stop("All the spike counts are 0 under a specific condition.") } if ((length(Acounts) == 1) | (length(Bcounts) == 1) | (length(ABcounts) == 1)) { stop("Only 1 available trial under a specific condition.") } s1 <- paste(frq[1], "Hz ", pos[1], "deg ", lev, "Db ", sep = "") s2 <- paste(frq[2], "Hz ", pos[2], "deg ", lev, "Db ", sep = "") dp <- paste("Duplex: ", lev, "Db ", sep = "") res = prml_tests( xA = Acounts, xB = Bcounts, xAB = ABcounts, labels = c(s1, s2, dp), e = 0, remove.zeros = remove.zeros ) # By default: #remove.zeros = FALSE #mu_l="min",mu_u="max",gamma.pars = c(0.5, 2e-10), #n_gq = 20, n_per = 100, alpha = 0.5 return(res) } # prml.from.fname prml.from.fname <- function(fname, data.path = "Data", on.reward = TRUE, match.level = FALSE, AB.eqlevel = FALSE, outfile = "", start = 0, end = 600, remove.zeros = FALSE, mccores = 4) { infile1 <- paste(data.path, "/JA/", fname, ".txt", sep = "") trials <- read.table( infile1, col.names = c( "TRIAL", "TASKID", "A_FREQ", "B_FREQ", "XA", "XB", "REWARD", "A_LEVEL", "B_LEVEL" ) ) infile2 <- paste(data.path, "/JA/", fname, "_spiketimes.txt", sep = "") spiketimes <- read.table(infile2, col.names = c("TRIAL2", "TIMES")) FREQS <- unique(c(trials$A_FREQ, trials$B_FREQ)) alt.freq <- sort(FREQS[FREQS > 0 & FREQS != 742]) alt.pos <- c(-24,-6, 6, 24) df <- expand.grid(alt.freq, alt.pos) res <- parallel::mclapply(1:nrow(df), function(x) { try({ lbf <- prml.from.tri( trials, spiketimes, c(df[x, 1], 742), c(df[x, 2],-144 / df[x, 2]), on.reward, start, end, match.level, AB.eqlevel, go.by.soff = FALSE, remove.zeros = remove.zeros ) %>% unlist(.) c(df[x, 1], df[x, 2], lbf) }) }, mc.cores = mccores) lapply(res, function(x) { cat(fname, x, "\n", file = outfile, append = TRUE) }) } set.seed(123) fnames <- scan(paste0("filenames_", file_number, ".txt"), "a") # for loop across all the file. start = Sys.time() for (file.name in fnames) { #"Data" is the folder you store all the .txt files #"result.txt" is the folder you store the result try(prml.from.fname( file.name, data.path = "Data", on.reward = TRUE, match.level = FALSE, AB.eqlevel = FALSE, outfile = paste0("result_", file_number, ".txt"), start = 0, end = 600, remove.zeros = FALSE, mccores = n_parallel )) } end = Sys.time() cat("Run time: ", difftime(end, start, units="secs"), "s\n", sep="") }
Create a .sh file (named prml_analysis.sh
).
If you use bash command to create the file, use vim
to create a file and edit it, then use esc
to escape editing, press shift+zz
to exit the file.
```{bash,eval=FALSE,echo=TRUE} cd PATH_TO_ANALYSIS vim prml_analysis.sh
If you use Rstudio to create the file: File -> New File -> Text File -> Save -> Change the name to `prml_analysis.sh`. Copy the following to the .sh file. Note: - `-c8` should match with the `n_parallel` argument of the R script, which is the first number after .R - `array=1-3` should match with `n_part` in the `Preparation` section. It should be the number of subfiles you have. ```{bash,eval=FALSE,echo=TRUE} #!/bin/bash #SBATCH --partition=common #SBATCH --output=others/filenames_%a.out #SBATCH --error=others/filenames_%a.err #SBATCH -c8 #SBATCH --array=1-3 singularity run -B $(pwd):/work prml.simg prml_analysis.R 8 $SLURM_ARRAY_TASK_ID
Open a new teminal 1.
Login to the compute cluster.
Analysis
```{bash,eval=FALSE,echo=TRUE} ssh ACCOUNT@dcc-slogin.oit.duke.edu:. mkdir Analysis
2. Open a terminal 2. - Copy the path to the folder `Analysis`. - Go to the folder `Analysis`. - Copy all the files within the local folder `Analysis` to the folder `Analysis` in compute cluster. ```{bash,eval=FALSE,echo=TRUE} cd PATH_TO_ANALYSIS rsync -av * ACCOUNT@dcc-slogin.oit.duke.edu:./Analysis
In terminal 1, go to the folder Analysis
to see the list of files.
```{bash,eval=FALSE,echo=TRUE} cd Analysis/ ls
## Singularity file I built a docker image providing the environment to run the `prml_analysis.R` script. The details of the singularity file please refer to the article [Singularity Environment Building](https://yunranchen.github.io/prml/articles/singularity.html). Here you just need to download it from the dockerhub. - Login to the compute cluster. - Go to the folder named `Analysis`. - Download the singularity. Copy the following line and run it in compute cluster. ```{bash,eval=FALSE,echo=TRUE} singularity pull docker://yunran/prml
Preparation: Make sure within your Analysis
file, including prml_analysis.R
, prml_analysis.sh
, prml.simg
, filenames_.txt
files and folder others
and Data
.
```{bash,eval=FALSE,echo=TRUE} sbatch prml_analysis.sh
- Check the status ```{bash,eval=FALSE,echo=TRUE} squeue -u ACCOUNT
Assume you create a folder named Results
to store all the results of the PRML analysis.
Results
```{bash,eval=FALSE,echo=TRUE} cd PATH_TO_RESULTS
- Fetch the result in compute cluster back to your local folder `Results`. ```{bash,eval=FALSE,echo=TRUE} rsync -av ACCOUNT@dcc-slogin.oit.duke.edu:./Analysis/result_*.txt PATH_TO_RESULTS
Results
```{bash,eval=FALSE,echo=TRUE} ls
- R code to clean the data and store it into a `.RData` file ```r library("tidyverse") filelist = list.files(pattern = ".*.txt") raw = map_df(filelist, function(x) { read.table( x, header = FALSE, sep = " ", stringsAsFactors = FALSE, fill = TRUE ) %>% filter(!V2 %in% c("Error", "")) }) %>% mutate(e = 0) %>% #plug in the value of e you set in prml_tests() select(-V15) names(raw) = c( "CellId", "AltFreq", "AltPos", "SepBF", "PrMix", "PrInt", "PrOut", "PrSing", "WinModels", "Pval1", "Pval2", "SampSizeA", "SampSizeB", "SampSizeAB", "e" ) raw = raw %>% mutate( SepBF = SepBF %>% as.numeric(), PrMix = PrMix %>% as.numeric(), PrInt = PrInt %>% as.numeric(), PrOut = PrOut %>% as.numeric(), PrSing = PrSing %>% as.numeric(), WinModels = WinModels %>% as.numeric(), Pval1 = Pval1 %>% as.numeric(), Pval2 = Pval2 %>% as.numeric(), SampSizeA = SampSizeA %>% as.numeric(), SampSizeB = SampSizeB %>% as.numeric(), SampSizeAB = SampSizeAB %>% as.numeric() ) model.names = c("Mixture", "Intermediate", "Outside", "Single") raw = raw %>% mutate(cid = paste(CellId, AltFreq, AltPos, sep = ".")) #filter out data that has errors cids = raw %>% filter(!WinModels %in% c(1:4)) %>% pull(cid) rawf = raw %>% filter(!cid %in% cids) %>% mutate(WinModel = model.names[WinModels], WinPr = pmax(PrMix, PrInt, PrOut, PrSing)) #triplets not filtered by PRML filter tri_unfilter=rawf%>% filter(SampSizeA>=5,SampSizeB>=5,SampSizeAB>=5,SepBF>=3)%>% #well-separated and enough sample size mutate(WinProb=cut(WinPr, breaks=c(0,0.25, 0.5, 0.95, 1.01),include.lowest = TRUE)) #cut the winning probabilities to four categories #triplets filtered by PRML filter tri_filter=tri_unfilter %>% mutate(invPval1=1/Pval1,invPval2=1/Pval2)%>% filter(invPval1<=20,invPval2<=20)%>% #filter by PRML filter. dplyr::select(-invPval1,-invPval2) tri=rbind(tri_unfilter,tri_filter)%>% mutate(group=rep(c("unfiltered","filtered"), c(nrow(tri_unfilter),nrow(tri_filter))))%>% mutate(WinModel=factor(WinModel,levels = c("Outside","Single","Intermediate","Mixture"))) #save as a .RData save(tri,file = "tri_filter_unfilter.RData")
library("ggplot2") ggplot(data = tri,mapping = aes(x = WinModel,color=WinProb,fill=WinProb))+ geom_bar(stat="count",position = "stack")+ facet_grid(~group)+ geom_text(stat='count', aes(label=..count..), position = position_stack(vjust = 0.5),size=3,color="black")+ scale_x_discrete(drop=FALSE) + scale_colour_grey(start = 0.9,end=0.3,drop=FALSE) + #if you use grey color scale_fill_grey(start = 0.9,end=0.3,drop=FALSE) + theme_bw() ggsave(filename = "plot.pdf",width = 8,height = 4)
n_parts
in Preparation
with array=
in Batch Script
; -c
in Batch Script
to the first argument of the R script in Batch Script
.rm
in bash). Or, add the day you running the code before all the files' names.others
folder to see the elapse time. (For example, 4218.462s for prml_tests()
through 40 files in JA.) This R script is for applying prml_tests_f()
.
{ args = commandArgs(trailingOnly = TRUE) stopifnot(length(args) == 2) n_parallel = args[1] file_number = args[2] suppressMessages(library("statmod")) suppressMessages(library("dplyr")) suppressMessages(library("purrr")) suppressMessages(library("prml")) prml.from.tri.f <- function(trials, spiketimes, frq = c(1100, 742), pos = c(24,-6), on.reward = TRUE, start.time = 0, end.time = 600, match.level = FALSE, AB.eqlevel = FALSE, go.by.soff = FALSE, remove.zeros = FALSE, ...) { attach(trials) attach(spiketimes) timestamps <- split(TIMES, TRIAL2) ntrials <- length(timestamps) trial.id <- as.numeric(names(timestamps)) ## same as unique(TRIAL2) ix1 <- TASKID == 8 & A_FREQ == frq[1] & XA == pos[1] ix2 <- TASKID == 8 & A_FREQ == frq[2] & XA == pos[2] ix3 <- TASKID == 12 & (A_FREQ == frq[1] & B_FREQ == frq[2] & XA == pos[1] & XB == pos[2]) | (A_FREQ == frq[2] & B_FREQ == frq[1] & XA == pos[2] & XB == pos[1]) if (on.reward) { ix1 <- ix1 & REWARD == 1 ix2 <- ix2 & REWARD == 1 ix3 <- ix3 & REWARD == 1 } blev <- sort(unique(B_LEVEL[ix3])) targ.lev <- blev[blev > 0] lev <- "*" if (match.level) { if (length(targ.lev) > 1) { targ.lev <- max(targ.lev) warning("Multiple single sound levels, choosing largest one") } ix1 <- ix1 & A_LEVEL == targ.lev ix2 <- ix2 & A_LEVEL == targ.lev lev <- as.character(targ.lev) } if (AB.eqlevel) ix3 <- ix3 & (A_LEVEL == B_LEVEL) sing1 <- trials[ix1, 1] sing2 <- trials[ix2, 1] duplx <- trials[ix3, 1] success <- REWARD[ix3] if (go.by.soff) end.time <- min(SOFF[ix1 | ix2 | ix3]) spike.counter <- function(jj) { jj1 <- match(jj, trial.id) spks <- timestamps[[jj1]] return(sum(spks > start.time & spks < end.time)) } Acounts <- sapply(sing1, spike.counter) Bcounts <- sapply(sing2, spike.counter) ABcounts <- sapply(duplx, spike.counter) detach(trials) detach(spiketimes) if ((mean(Acounts) == 0) | (mean(Bcounts == 0)) | (mean(ABcounts == 0))) { stop("All the spike counts are 0 under a specific condition.") } if ((length(Acounts) == 1) | (length(Bcounts) == 1) | (length(ABcounts) == 1)) { stop("Only 1 available trial under a specific condition.") } s1 <- paste(frq[1], "Hz ", pos[1], "deg ", lev, "Db ", sep = "") s2 <- paste(frq[2], "Hz ", pos[2], "deg ", lev, "Db ", sep = "") dp <- paste("Duplex: ", lev, "Db ", sep = "") res = prml_tests_f( xA = Acounts, xB = Bcounts, xAB = ABcounts, labels = c(s1, s2, dp), e = 0, remove.zeros = remove.zeros ) # By default: #remove.zeros = FALSE #mu_l="min",mu_u="max",gamma.pars = c(0.5, 2e-10), #n_gq = 20, n_mu = 100, n_per = 100, alpha = 0.5 return(res) } prml.from.fname.f <- function(fname, data.path = "Data", on.reward = TRUE, match.level = FALSE, AB.eqlevel = FALSE, outfile = "", start = 0, end = 600, remove.zeros = FALSE, mccores = 4) { infile1 <- paste(data.path, "/JA/", fname, ".txt", sep = "") trials <- read.table( infile1, col.names = c( "TRIAL", "TASKID", "A_FREQ", "B_FREQ", "XA", "XB", "REWARD", "A_LEVEL", "B_LEVEL" ) ) infile2 <- paste(data.path, "/JA/", fname, "_spiketimes.txt", sep = "") spiketimes <- read.table(infile2, col.names = c("TRIAL2", "TIMES")) FREQS <- unique(c(trials$A_FREQ, trials$B_FREQ)) alt.freq <- sort(FREQS[FREQS > 0 & FREQS != 742]) alt.pos <- c(-24,-6, 6, 24) df <- expand.grid(alt.freq, alt.pos) res <- parallel::mclapply(1:nrow(df), function(x) { try({ lbf <- prml.from.tri.f( trials, spiketimes, c(df[x, 1], 742), c(df[x, 2],-144 / df[x, 2]), on.reward, start, end, match.level, AB.eqlevel, go.by.soff = FALSE, remove.zeros = remove.zeros ) cla <- c(df[x, 1], df[x, 2], unlist(lbf$out1)) mix <- c(df[x, 1], df[x, 2], unlist(lbf$out2$mix_pf)) int <- c(df[x, 1], df[x, 2], unlist(lbf$out2$int_pf)) outA <- c(df[x, 1], df[x, 2], unlist(lbf$out2$outA_pf)) outB <- c(df[x, 1], df[x, 2], unlist(lbf$out2$outB_pf)) list(cla, mix, int, outA, outB) }) }, mc.cores = mccores) lapply(res, function(x) { try({ cat(fname, x[[1]], "\n", file = outfile, append = TRUE) cat("mix", fname, x[[2]], "\n", file = paste0("mix_", outfile), append = TRUE) cat( "int", fname, x[[3]], "\n", file = paste0("int_out_", outfile), append = TRUE ) cat( "outA", fname, x[[4]], "\n", file = paste0("int_out_", outfile), append = TRUE ) cat( "outB", fname, x[[5]], "\n", file = paste0("int_out_", outfile), append = TRUE ) }) }) } set.seed(123) fnames <- scan(paste0("filenames_", file_number, ".txt"), "a") start <- Sys.time() for (file.name in fnames) { #"Data" is the folder you store all the .txt files #"result.txt" is the folder you store the result try(prml.from.fname.f( file.name, data.path = "Data", on.reward = TRUE, match.level = FALSE, AB.eqlevel = FALSE, outfile = paste0("result_", file_number, ".txt"), start = 0, end = 600, remove.zeros = FALSE, mccores = n_parallel )) } end <- Sys.time() cat("Run time: ", difftime(end, start, units = "secs"), "s\n", sep = "") }
In your local folder Results
, fetch the results from compute cluster.
```{bash,eval=FALSE,echo=TRUE} rsync -av ACCOUNT@dcc-slogin.oit.duke.edu:./Analysis/result_ PATH_TO_RESULTS
Data cleaning and visualization. - Obtain the file list for all the result. ```r library("tidyverse") filelist_cla = list.files(pattern = "^result_*") filelist_mix = list.files(pattern = "mix_result_*") filelist_intout = list.files(pattern = "int_out_result_*")
raw = map_dfr(filelist_cla, function(x) { read.table( x, header = FALSE, sep = " ", stringsAsFactors = FALSE, fill = TRUE ) %>% filter(!V2 %in% c("Error", "")) }) %>% mutate(e = 0) %>% #plug in the value of e you set in prml_tests() select(-V15) names(raw) = c( "CellId", "AltFreq", "AltPos", "SepBF", "PrMix", "PrInt", "PrOut", "PrSing", "WinModels", "Pval1", "Pval2", "SampSizeA", "SampSizeB", "SampSizeAB", "e" ) raw = raw %>% mutate( SepBF = SepBF %>% as.numeric(), PrMix = PrMix %>% as.numeric(), PrInt = PrInt %>% as.numeric(), PrOut = PrOut %>% as.numeric(), PrSing = PrSing %>% as.numeric(), WinModels = WinModels %>% as.numeric(), Pval1 = Pval1 %>% as.numeric(), Pval2 = Pval2 %>% as.numeric(), SampSizeA = SampSizeA %>% as.numeric(), SampSizeB = SampSizeB %>% as.numeric(), SampSizeAB = SampSizeAB %>% as.numeric() ) model.names = c("Mixture", "Intermediate", "Outside", "Single") raw = raw %>% mutate(cid = paste(CellId, AltFreq, AltPos, sep = ".")) #filter out data that has errors cids = raw %>% filter(!WinModels %in% c(1:4)) %>% pull(cid) rawf = raw %>% filter(!cid %in% cids) %>% mutate(WinModel = model.names[WinModels], WinPr = pmax(PrMix, PrInt, PrOut, PrSing)) #triplets not filtered by PRML filter tri_unfilter=rawf%>% filter(SampSizeA>=5,SampSizeB>=5,SampSizeAB>=5,SepBF>=3)%>% #well-separated and enough sample size mutate(WinProb=cut(WinPr, breaks=c(0,0.25, 0.5, 0.95, 1.01),include.lowest = TRUE)) #cut the winning probabilities to four categories #triplets filtered by PRML filter tri_filter=tri_unfilter %>% mutate(invPval1=1/Pval1,invPval2=1/Pval2)%>% filter(invPval1<=20,invPval2<=20)%>% #filter by PRML filter. dplyr::select(-invPval1,-invPval2) tri=rbind(tri_unfilter,tri_filter)%>% mutate(group=rep(c("unfiltered","filtered"), c(nrow(tri_unfilter),nrow(tri_filter))))%>% mutate(WinModel=factor(WinModel,levels = c("Outside","Single","Intermediate","Mixture"))) #save as a .RData save(tri,file = "tri_filter_unfilter.RData")
library("ggplot2") ggplot(data = tri,mapping = aes(x = WinModel,color=WinProb,fill=WinProb))+ geom_bar(stat="count",position = "stack")+ facet_grid(~group)+ geom_text(stat='count', aes(label=..count..), position = position_stack(vjust = 0.5),size=3,color="black")+ scale_x_discrete(drop=FALSE) + scale_colour_grey(start = 0.9,end=0.3,drop=FALSE) + #if you use grey color scale_fill_grey(start = 0.9,end=0.3,drop=FALSE) + theme_bw() ggsave(filename = "plot.pdf",width = 8,height = 4)
e_=0 # the value of e filelist_intout = list.files(pattern = "int_out_result_*") int = map_dfr(filelist_intout, function(x) { read.table( x, header = FALSE, sep = " ", stringsAsFactors = FALSE, fill = TRUE )}) %>% mutate(e=e_) %>% mutate(cid=paste(V2,V3,V4,sep = ".")) mix = map_dfr(filelist_mix, function(x) { read.table( x, header = FALSE, sep = " ", stringsAsFactors = FALSE, fill = TRUE )}) %>% mutate(e=e_,V5=e_+(1-2*e_)*V5,V6=e_+(1-2*e_)*V6)%>% mutate(cid=paste(V2,V3,V4,sep = "."))
# Here I draw the filtered triplets. dfres=tri%>% filter(group=="filtered") ids=dfres%>%pull(cid) pdf(height = 3, width = 5, file = "plot_pdf.pdf") for (i in ids){ try({ int0=int%>%filter(cid==i,V1=="int")%>%.[,5:104]%>%t() df1=data.frame(ind=seq(e_,1-e_,length.out = 100),y=int0,e=rep(e_,100),model=rep("int",100)) mix0=mix%>%filter(cid==i)%>%.[,c(5,6)]%>%t() df2=data.frame(ind=c(0,1),y=mix0,e=rep(0,2)) p=ggplot(data = df1,mapping = aes(x=ind,y=y))+ geom_line()+ #facet_grid(.~e)+ geom_point(data=df2,mapping = aes(x=ind,y=y))+ geom_hline(yintercept = e_,linetype="dashed")+ geom_hline(yintercept = 1-e_,linetype="dashed")+ geom_vline(xintercept = e_,linetype="dashed")+ geom_vline(xintercept = 1-e_,linetype="dashed")+ ggtitle(paste0(i,";"), paste0(dfres%>%filter(cid==i)%>%pull(WinModel), ";WinProb:", dfres%>%filter(cid==i)%>%pull(WinPr)%>%round(.,4), ";SizeA:", dfres%>%filter(cid==i)%>%pull(SampSizeA), ";SizeB:", dfres%>%filter(cid==i)%>%pull(SampSizeB), ";SizeAB:", dfres%>%filter(cid==i)%>%pull(SampSizeAB) ))+ theme_bw()+ xlab("") print(p) }) } dev.off()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.