knitr::opts_chunk$set(echo = TRUE)
library(tidyverse) library(dcphelper) new_path_base = c("/home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2/" ) inbox_path_base= c("/home/ubuntu/bucket/inbox/000012049003__2019-01-28T21_02_07-Measurement_2/Images/") new_json_path_flat = "/home/ubuntu/bucket/metadata/job_flatfield_template.json" new_json_path_max = "/home/ubuntu/bucket/metadata/job_maxproj_template.json" new_json_path_seg = "/home/ubuntu/bucket/metadata/job_segmentation_template.json" ## Creating target dir dir.create(new_path_base) # Do not execute this from a local machine if you expect other AWS services to access the directory later on ## Name channels channel_v <- c("ch1", "ch2", "ch3", "ch4") channel_n <- c("pc", "bf", "ce", "tm")
for(j in 1:length(inbox_path_base)){ metadata_split_path <- create_flatfield_metadata_split( path = inbox_path_base[j], channel_of_interest = channel_v[1], #brightfield name = "pc", json_path = new_json_path, #not needed path_base = new_path_base, force = FALSE, include_brightfield_proj = TRUE, include_additional_proj = TRUE) } for(i in 3:length(channel_n)){ for(j in 1:length(inbox_path_base)){ metadata_split_path <- create_flatfield_metadata_split( path = inbox_path_base[j], channel_of_interest = channel_v[i], #brightfield name = channel_n[i], json_path = new_json_path, #not needed path_base = new_path_base, force = FALSE) } }
```{python, eval = TRUE}
python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "pc"
python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "bf"
python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "ce"
python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "tm"
python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py
python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py
python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py
python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py
python ~/bucket/metadata/ManualMetadata_dir.py #/home/ubuntu/bucket/metadata/000012048903__2019-02-07T20_15_54-Measurement_3/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "pc"
python ~/bucket/metadata/ManualMetadata_dir.py #/home/ubuntu/bucket/metadata/000012048903__2019-02-07T20_15_54-Measurement_3/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "bf"
python ~/bucket/metadata/ManualMetadata_dir.py #/home/ubuntu/bucket/metadata/000012048903__2019-02-07T20_15_54-Measurement_3/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "ce"
python ~/bucket/metadata/ManualMetadata_dir.py #/home/ubuntu/bucket/metadata/000012048903__2019-02-07T20_15_54-Measurement_3/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "tm"
I also have to re-run parts of the grouping ```r python ~/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "bf_O" python ~/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "bf_P" python ~/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "pc_O" python ~/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "pc_P"
for(j in new_path_base){ link_json_metadata(metadata_split_path = list.files(j, pattern = "metadata_", full.names = TRUE) %>% stringr::str_subset(pattern = ".csv") %>% stringr::str_subset(pattern = "pc"), json_path = new_json_path_seg, path_base = j) } for(j in new_path_base){ link_json_metadata(metadata_split_path = list.files(j, pattern = "metadata_", full.names = TRUE) %>% stringr::str_subset(pattern = ".csv") %>% stringr::str_subset(pattern = "bf"), json_path = new_json_path_flat, path_base = j) } channel_n_mod <- channel_n[3:4] for(j in new_path_base){ for(i in 1:length(channel_n_mod)){ link_json_metadata(metadata_split_path = list.files(j, pattern = "metadata_", full.names = TRUE) %>% stringr::str_subset(pattern = ".csv") %>% stringr::str_subset(pattern = channel_n_mod[i]), json_path = new_json_path_max, path_base = j) } }
I am encountering an error with the phase contrast grouping at position O10. I am rerunning these manually.
for(j in new_path_base){ tmp <- link_json_metadata(metadata_split_path = list.files(j, pattern = "metadata_", full.names = TRUE) %>% stringr::str_subset(pattern = ".csv") %>% stringr::str_subset(pattern = "pc_O1"), json_path = new_json_path_seg, path_base = j) } for(j in new_path_base){ tmp <- link_json_metadata(metadata_split_path = list.files(j, pattern = "metadata_", full.names = TRUE) %>% stringr::str_subset(pattern = ".csv") %>% stringr::str_subset(pattern = "pc_O2"), json_path = new_json_path_seg, path_base = j) } for(j in new_path_base){ tmp <- link_json_metadata(metadata_split_path = list.files(j, pattern = "metadata_", full.names = TRUE) %>% stringr::str_subset(pattern = ".csv") %>% stringr::str_subset(pattern = "pc_P"), json_path = new_json_path_seg, path_base = j) }
for(j in new_path_base){ for(i in 1:length(channel_n)){ group_jobs_bash(path_base = j, name = channel_n[i], letter_row_interval = c(1:16), number_col_interval = c(1:24)) } }
I generate the following whole-plate job files.
[1] "Wrote: /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2//quick_group_jobs_bash_pc_2019-02-27.sh" [1] "Wrote: /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2//quick_group_jobs_bash_bf_2019-02-27.sh" [1] "Wrote: /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2//quick_group_jobs_bash_ce_2019-02-27.sh" [1] "Wrote: /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2//quick_group_jobs_bash_tm_2019-02-27.sh"
I bump up the fleet size to 50 instances - this is going to be a lot of data to be processed.
First run brightfield and max-projection jobs, then the phase-contrast based segmentation.
It is important to run all the jobs first, before renaming files for Deep Learning. Everything below has not been adjusted to the plate set at hand.
Because of errors in the job files, the pipeline results were all dumped in the wrong flatfield directory. I am now manually trasnferring files.
aws s3 mv --recursive --exclude "*" --include "000012049003__2019-01-28T21_02_07-Measurement_2*" s3://ascstore/flatfield/000012070903_2019-01-10T20_04_27-Measurement_3/ s3://ascstore/flatfield/000012049003__2019-01-28T21_02_07-Measurement_2/
I keep getting errors because of an underscore vs. hyphen issue
The pipeline first did not find the files because it was looking for "-" delimited headers in the data, now I cahnged the metadata to look for underscores. Not I get the following error
For now, I think I will revert the metadata and test a single directory manually
I call cellprofiler locally
cellprofiler -c -r -p /home/ubuntu/bucket/rapid/cellprofiler/pipelines/dig_pc_segment_expand_root_measure.cppipe -i /home/ubuntu/bucket/blank -o /home/ubuntu/local_output/000012049003__2019-01-28T21_02_07-Measurement_2-sk1-A01-f01-ch1 -d /home/ubuntu/local_output/000012049003__2019-01-28T21_02_07-Measurement_2-sk1-A01-f01-ch1/cp.is.done --data-file=/home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2//metadata_000012049003__2019-01-28T21_02_07-Measurement_2_pc_A01.csv -g Metadata_parent=000012049003__2019-01-28T21_02_07-Measurement_2,Metadata_timepoint=sk1,Metadata_well=A01,Metadata_fld=f01,Metadata_channel=ch1 --log-level=10
I still get the same error
I call the pipeline on another directory of images
cellprofiler -c -r -p /home/ubuntu/bucket/rapid/cellprofiler/pipelines/dig_pc_segment_expand_root_measure.cppipe -i /home/ubuntu/bucket/blank -o /home/ubuntu/local_output/000012070903_2019-01-10T20_04_27-Measurement_3-sk1-A01-f01-ch1 -d /home/ubuntu/local_output/000012070903_2019-01-10T20_04_27-Measurement_3-sk1-A01-f01-ch1/cp.is.done --data-file=/home/ubuntu/bucket/metadata/000012070903_2019-01-10T20_04_27-Measurement_3//metadata_000012070903_2019-01-10T20_04_27-Measurement_3_pc_A01.csv -g Metadata_parent=000012070903_2019-01-10T20_04_27-Measurement_3,Metadata_timepoint=sk1,Metadata_well=A01,Metadata_fld=f01,Metadata_channel=ch1 --log-level=10
I get the same error - the pipeline seems to be corrupt!
It is not the pipeline, but the job file that is not okay - for some reason the path for the measurment of ch1 is written in a ch3 directory. That breaks the pipeline
Moreover, I see that only timepoint 1 is processed. Idk why.
In fact, the hyphon vs. underscore issue does not have an impact.
cellprofiler -c -r -p /home/ubuntu/bucket/rapid/cellprofiler/pipelines/dig_pc_segment_expand_root_measure.cppipe -i /home/ubuntu/bucket/blank -o /home/ubuntu/local_output/000012049003__2019-01-28T21_02_07-Measurement_2-sk1-A05-f01-ch1 -d /home/ubuntu/local_output/000012049003__2019-01-28T21_02_07-Measurement_2-sk1-A05-f01-ch1/cp.is.done --data-file=/home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2//metadata_000012049003__2019-01-28T21_02_07-Measurement_2_pc_A05.csv -g Metadata_parent=000012049003__2019-01-28T21_02_07-Measurement_2,Metadata_timepoint=sk1,Metadata_well=A05,Metadata_fld=f01,Metadata_channel=ch1 --log-level=10
Now I resubmit the pc segmentation pipeline. I found a bug that caused the data to be written in the wrong directory. I am moving files now manually again.
aws s3 mv --dryrun --recursive s3://ascstore//home/ubuntu/bucket/flatfield/000012049003__2019-01-28T21_02_07-Measurement_2/ s3://ascstore/flatfield/000012049003__2019-01-28T21_02_07-Measurement_2/
The job finished. The data for plate 2 is collected. Now moving on to measurments 1 and 3.
I basically rerun the whole thing for the other two plates:
library(tidyverse) library(dcphelper) new_path_base = c("/home/ubuntu/bucket/metadata/000012049003__2019-01-29T19_11_28-Measurement_3/", "/home/ubuntu/bucket/metadata/000012049003__2019-01-31T18_40_09-Measurement_1/") inbox_path_base= c("/home/ubuntu/bucket/inbox/000012049003__2019-01-29T19_11_28-Measurement_3/Images/", "/home/ubuntu/bucket/inbox/000012049003__2019-01-31T18_40_09-Measurement_1/Images/") new_json_path_flat = "/home/ubuntu/bucket/metadata/job_flatfield_template.json" new_json_path_max = "/home/ubuntu/bucket/metadata/job_maxproj_template.json" new_json_path_seg = "/home/ubuntu/bucket/metadata/job_segmentation_template.json" ## Creating target dir lapply(new_path_base, dir.create) # Do not execute this from a local machine if you expect other AWS services to access the directory later on ## Name channels channel_v <- c("ch1", "ch2", "ch3", "ch4") channel_n <- c("pc", "bf", "ce", "tm")
for(j in 1:length(inbox_path_base)){ metadata_split_path <- create_flatfield_metadata_split( path = inbox_path_base[j], channel_of_interest = channel_v[1], #brightfield name = "pc", json_path = new_json_path, #not needed path_base = new_path_base[j], force = FALSE, include_brightfield_proj = TRUE, include_additional_proj = TRUE) } for(i in 3:length(channel_n)){ for(j in 1:length(inbox_path_base)){ metadata_split_path <- create_flatfield_metadata_split( path = inbox_path_base[j], channel_of_interest = channel_v[i], #brightfield name = channel_n[i], json_path = new_json_path, #not needed path_base = new_path_base[j], force = FALSE) } }
python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-29T19_11_28-Measurement_3/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "pc" python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-29T19_11_28-Measurement_3/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "bf" python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-29T19_11_28-Measurement_3/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "ce" python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-29T19_11_28-Measurement_3/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "tm" python ~/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-31T18_40_09-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "pc" python ~/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-31T18_40_09-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "bf" python ~/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-31T18_40_09-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "ce" python ~/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-31T18_40_09-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "tm"
I did all of this using the new generator scripts. They are simple executable R scripts, covering all of the code above.
I manually created a masterfile to batch up the grouped DCP calls. This is something that has to be done manually right now and is not stored.
I encountered errors in the segmentation pipeline. For now I send of everything for processing but comment out the DCP run.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.