In NiklasTR/dcp_helper: Create metadata and job files for distributed cell profiler

knitr::opts_chunk$set(echo = TRUE)

Brightfield Flatfield projection

Naming target dir

library(tidyverse)
library(dcphelper)

new_path_base = c("/home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2/"
                  )

inbox_path_base= c("/home/ubuntu/bucket/inbox/000012049003__2019-01-28T21_02_07-Measurement_2/Images/")

new_json_path_flat = "/home/ubuntu/bucket/metadata/job_flatfield_template.json"
new_json_path_max = "/home/ubuntu/bucket/metadata/job_maxproj_template.json"
new_json_path_seg = "/home/ubuntu/bucket/metadata/job_segmentation_template.json"

## Creating target dir 
dir.create(new_path_base) # Do not execute this from a local machine if you expect other AWS services to access the directory later on

## Name channels
channel_v <- c("ch1", "ch2", "ch3", "ch4")
channel_n <- c("pc", "bf", "ce", "tm")

Defining metadata

for(j in 1:length(inbox_path_base)){
  metadata_split_path <- create_flatfield_metadata_split(
    path = inbox_path_base[j],
    channel_of_interest = channel_v[1], #brightfield
    name = "pc",
    json_path = new_json_path, #not needed
    path_base = new_path_base,
    force = FALSE,
    include_brightfield_proj = TRUE,
    include_additional_proj = TRUE)
}


for(i in 3:length(channel_n)){
  for(j in 1:length(inbox_path_base)){
  metadata_split_path <- create_flatfield_metadata_split(
    path = inbox_path_base[j],
    channel_of_interest = channel_v[i], #brightfield
    name = channel_n[i],
    json_path = new_json_path, #not needed
    path_base = new_path_base,
    force = FALSE)
  }
}

Grouping metadata

```{python, eval = TRUE}

python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "pc"

needs renaming:

python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py

/home/ubuntu/bucket/metadata/000012048903__2019-02-05T20_27_41-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "pc"

python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py

/home/ubuntu/bucket/metadata/000012048903__2019-02-05T20_27_41-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "bf"

python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py

/home/ubuntu/bucket/metadata/000012048903__2019-02-05T20_27_41-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "ce"

python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py

/home/ubuntu/bucket/metadata/000012048903__2019-02-05T20_27_41-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "tm"

python ~/bucket/metadata/ManualMetadata_dir.py #/home/ubuntu/bucket/metadata/000012048903__2019-02-07T20_15_54-Measurement_3/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "pc"

I also have to re-run parts of the grouping

```r
python ~/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "bf_O"

python ~/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "bf_P"

python ~/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "pc_O"

python ~/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "pc_P"

Writing job files

for(j in new_path_base){
link_json_metadata(metadata_split_path = list.files(j, pattern = "metadata_", full.names = TRUE) %>%
                     stringr::str_subset(pattern = ".csv") %>%
                     stringr::str_subset(pattern = "pc"), 
                   json_path = new_json_path_seg, 
                   path_base = j)
}


for(j in new_path_base){
link_json_metadata(metadata_split_path = list.files(j, pattern = "metadata_", full.names = TRUE) %>%
                     stringr::str_subset(pattern = ".csv") %>%
                     stringr::str_subset(pattern = "bf"), 
                   json_path = new_json_path_flat, 
                   path_base = j)
}


channel_n_mod <- channel_n[3:4]
for(j in new_path_base){
for(i in 1:length(channel_n_mod)){
link_json_metadata(metadata_split_path = list.files(j, pattern = "metadata_", full.names = TRUE) %>%
                     stringr::str_subset(pattern = ".csv") %>%
                     stringr::str_subset(pattern = channel_n_mod[i]), 
                   json_path = new_json_path_max, 
                   path_base = j)
}
}

I am encountering an error with the phase contrast grouping at position O10. I am rerunning these manually.

for(j in new_path_base){
tmp <- link_json_metadata(metadata_split_path = list.files(j, pattern = "metadata_", full.names = TRUE) %>%
                     stringr::str_subset(pattern = ".csv") %>%
                     stringr::str_subset(pattern = "pc_O1"), 
                   json_path = new_json_path_seg, 
                   path_base = j)
}

for(j in new_path_base){
tmp <- link_json_metadata(metadata_split_path = list.files(j, pattern = "metadata_", full.names = TRUE) %>%
                     stringr::str_subset(pattern = ".csv") %>%
                     stringr::str_subset(pattern = "pc_O2"), 
                   json_path = new_json_path_seg, 
                   path_base = j)
}

for(j in new_path_base){
tmp <- link_json_metadata(metadata_split_path = list.files(j, pattern = "metadata_", full.names = TRUE) %>%
                     stringr::str_subset(pattern = ".csv") %>%
                     stringr::str_subset(pattern = "pc_P"), 
                   json_path = new_json_path_seg, 
                   path_base = j)
}

Grouping job files and creating an executable

for(j in new_path_base){
for(i in 1:length(channel_n)){
group_jobs_bash(path_base = j,
                name = channel_n[i],
                letter_row_interval = c(1:16),
                number_col_interval = c(1:24))
}
}

I generate the following whole-plate job files.

[1] "Wrote: /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2//quick_group_jobs_bash_pc_2019-02-27.sh"
[1] "Wrote: /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2//quick_group_jobs_bash_bf_2019-02-27.sh"
[1] "Wrote: /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2//quick_group_jobs_bash_ce_2019-02-27.sh"
[1] "Wrote: /home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2//quick_group_jobs_bash_tm_2019-02-27.sh"

Running jobs

I bump up the fleet size to 50 instances - this is going to be a lot of data to be processed.

First run brightfield and max-projection jobs, then the phase-contrast based segmentation.

It is important to run all the jobs first, before renaming files for Deep Learning. Everything below has not been adjusted to the plate set at hand.

Because of errors in the job files, the pipeline results were all dumped in the wrong flatfield directory. I am now manually trasnferring files.

aws s3 mv --recursive --exclude "*" --include "000012049003__2019-01-28T21_02_07-Measurement_2*" s3://ascstore/flatfield/000012070903_2019-01-10T20_04_27-Measurement_3/ s3://ascstore/flatfield/000012049003__2019-01-28T21_02_07-Measurement_2/

I keep getting errors because of an underscore vs. hyphen issue

The pipeline first did not find the files because it was looking for "-" delimited headers in the data, now I cahnged the metadata to look for underscores. Not I get the following error

For now, I think I will revert the metadata and test a single directory manually

I call cellprofiler locally

cellprofiler -c -r -p /home/ubuntu/bucket/rapid/cellprofiler/pipelines/dig_pc_segment_expand_root_measure.cppipe -i /home/ubuntu/bucket/blank -o /home/ubuntu/local_output/000012049003__2019-01-28T21_02_07-Measurement_2-sk1-A01-f01-ch1 -d /home/ubuntu/local_output/000012049003__2019-01-28T21_02_07-Measurement_2-sk1-A01-f01-ch1/cp.is.done --data-file=/home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2//metadata_000012049003__2019-01-28T21_02_07-Measurement_2_pc_A01.csv -g Metadata_parent=000012049003__2019-01-28T21_02_07-Measurement_2,Metadata_timepoint=sk1,Metadata_well=A01,Metadata_fld=f01,Metadata_channel=ch1 --log-level=10

I still get the same error

I call the pipeline on another directory of images

cellprofiler -c -r -p /home/ubuntu/bucket/rapid/cellprofiler/pipelines/dig_pc_segment_expand_root_measure.cppipe -i /home/ubuntu/bucket/blank -o /home/ubuntu/local_output/000012070903_2019-01-10T20_04_27-Measurement_3-sk1-A01-f01-ch1 -d /home/ubuntu/local_output/000012070903_2019-01-10T20_04_27-Measurement_3-sk1-A01-f01-ch1/cp.is.done --data-file=/home/ubuntu/bucket/metadata/000012070903_2019-01-10T20_04_27-Measurement_3//metadata_000012070903_2019-01-10T20_04_27-Measurement_3_pc_A01.csv -g Metadata_parent=000012070903_2019-01-10T20_04_27-Measurement_3,Metadata_timepoint=sk1,Metadata_well=A01,Metadata_fld=f01,Metadata_channel=ch1 --log-level=10

I get the same error - the pipeline seems to be corrupt!

It is not the pipeline, but the job file that is not okay - for some reason the path for the measurment of ch1 is written in a ch3 directory. That breaks the pipeline

Moreover, I see that only timepoint 1 is processed. Idk why.

In fact, the hyphon vs. underscore issue does not have an impact.

cellprofiler -c -r -p /home/ubuntu/bucket/rapid/cellprofiler/pipelines/dig_pc_segment_expand_root_measure.cppipe -i /home/ubuntu/bucket/blank -o /home/ubuntu/local_output/000012049003__2019-01-28T21_02_07-Measurement_2-sk1-A05-f01-ch1 -d /home/ubuntu/local_output/000012049003__2019-01-28T21_02_07-Measurement_2-sk1-A05-f01-ch1/cp.is.done --data-file=/home/ubuntu/bucket/metadata/000012049003__2019-01-28T21_02_07-Measurement_2//metadata_000012049003__2019-01-28T21_02_07-Measurement_2_pc_A05.csv -g Metadata_parent=000012049003__2019-01-28T21_02_07-Measurement_2,Metadata_timepoint=sk1,Metadata_well=A05,Metadata_fld=f01,Metadata_channel=ch1 --log-level=10

Now I resubmit the pc segmentation pipeline. I found a bug that caused the data to be written in the wrong directory. I am moving files now manually again.

aws s3 mv --dryrun --recursive s3://ascstore//home/ubuntu/bucket/flatfield/000012049003__2019-01-28T21_02_07-Measurement_2/ s3://ascstore/flatfield/000012049003__2019-01-28T21_02_07-Measurement_2/

The job finished. The data for plate 2 is collected. Now moving on to measurments 1 and 3.

I basically rerun the whole thing for the other two plates:

Brightfield Flatfield projection

Naming target dir

library(tidyverse)
library(dcphelper)

new_path_base = c("/home/ubuntu/bucket/metadata/000012049003__2019-01-29T19_11_28-Measurement_3/",
                  "/home/ubuntu/bucket/metadata/000012049003__2019-01-31T18_40_09-Measurement_1/")

inbox_path_base= c("/home/ubuntu/bucket/inbox/000012049003__2019-01-29T19_11_28-Measurement_3/Images/",
                   "/home/ubuntu/bucket/inbox/000012049003__2019-01-31T18_40_09-Measurement_1/Images/")

new_json_path_flat = "/home/ubuntu/bucket/metadata/job_flatfield_template.json"
new_json_path_max = "/home/ubuntu/bucket/metadata/job_maxproj_template.json"
new_json_path_seg = "/home/ubuntu/bucket/metadata/job_segmentation_template.json"

## Creating target dir 
lapply(new_path_base, dir.create) # Do not execute this from a local machine if you expect other AWS services to access the directory later on

## Name channels
channel_v <- c("ch1", "ch2", "ch3", "ch4")
channel_n <- c("pc", "bf", "ce", "tm")

Defining metadata

for(j in 1:length(inbox_path_base)){
  metadata_split_path <- create_flatfield_metadata_split(
    path = inbox_path_base[j],
    channel_of_interest = channel_v[1], #brightfield
    name = "pc",
    json_path = new_json_path, #not needed
    path_base = new_path_base[j],
    force = FALSE,
    include_brightfield_proj = TRUE,
    include_additional_proj = TRUE)
}


for(i in 3:length(channel_n)){
  for(j in 1:length(inbox_path_base)){
  metadata_split_path <- create_flatfield_metadata_split(
    path = inbox_path_base[j],
    channel_of_interest = channel_v[i], #brightfield
    name = channel_n[i],
    json_path = new_json_path, #not needed
    path_base = new_path_base[j],
    force = FALSE)
  }
}

Grouping metadata

python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-29T19_11_28-Measurement_3/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "pc"

python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-29T19_11_28-Measurement_3/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "bf"

python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-29T19_11_28-Measurement_3/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "ce"

python /home/ubuntu/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-29T19_11_28-Measurement_3/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "tm"


python ~/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-31T18_40_09-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "pc"

python ~/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-31T18_40_09-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "bf"

python ~/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-31T18_40_09-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "ce"

python ~/bucket/metadata/ManualMetadata_dir.py /home/ubuntu/bucket/metadata/000012049003__2019-01-31T18_40_09-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "tm"

I did all of this using the new generator scripts. They are simple executable R scripts, covering all of the code above.

I manually created a masterfile to batch up the grouped DCP calls. This is something that has to be done manually right now and is not stored.

I encountered errors in the segmentation pipeline. For now I send of everything for processing but comment out the DCP run.

NiklasTR/dcp_helper documentation built on June 12, 2020, 10:22 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

NiklasTR/dcp_helper
Create metadata and job files for distributed cell profiler

In NiklasTR/dcp_helper: Create metadata and job files for distributed cell profiler

Brightfield Flatfield projection

Naming target dir

Defining metadata

Grouping metadata

needs renaming:

/home/ubuntu/bucket/metadata/000012048903__2019-02-05T20_27_41-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "pc"

/home/ubuntu/bucket/metadata/000012048903__2019-02-05T20_27_41-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "bf"

/home/ubuntu/bucket/metadata/000012048903__2019-02-05T20_27_41-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "ce"

/home/ubuntu/bucket/metadata/000012048903__2019-02-05T20_27_41-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "tm"

Writing job files

Grouping job files and creating an executable

Running jobs

Brightfield Flatfield projection

Naming target dir

Defining metadata

Grouping metadata

R Package Documentation

Browse R Packages

We want your feedback!

NiklasTR/dcp_helper Create metadata and job files for distributed cell profiler

In NiklasTR/dcp_helper: Create metadata and job files for distributed cell profiler

Brightfield Flatfield projection

Naming target dir

Defining metadata

Grouping metadata

needs renaming:

/home/ubuntu/bucket/metadata/000012048903__2019-02-05T20_27_41-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "pc"

/home/ubuntu/bucket/metadata/000012048903__2019-02-05T20_27_41-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "bf"

/home/ubuntu/bucket/metadata/000012048903__2019-02-05T20_27_41-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "ce"

/home/ubuntu/bucket/metadata/000012048903__2019-02-05T20_27_41-Measurement_1/ "['Metadata_parent','Metadata_timepoint', 'Metadata_well', 'Metadata_fld', 'Metadata_channel']" "tm"

Writing job files

Grouping job files and creating an executable

Running jobs

Brightfield Flatfield projection

Naming target dir

Defining metadata

Grouping metadata

R Package Documentation

Browse R Packages

We want your feedback!

NiklasTR/dcp_helper
Create metadata and job files for distributed cell profiler