knitr::opts_chunk$set(echo = F, cache = T, cache.lazy = F, dpi = 300)
library(EBImage)
library(ShoeScrubR)
library(tidyverse)

set.seed(3142095)
lss_dir <- "/lss/research/csafe-shoeprints/ShoeImagingPermanent"

# For a bunch of images...
full_imglist <- list.files("/lss/research/csafe-shoeprints/ShoeImagingPermanent/",
                           pattern = "0[01]\\d{4}[RL]_\\d{8}_5_1_1_.*_.*_.*", full.names = T)
dir <- "/tmp/film-prints"
if (!dir.exists(dir)) dir.create(dir)

file.copy(full_imglist, file.path(dir, basename(full_imglist)))
imglist <- file.path(dir, basename(full_imglist))

shoe_info <- read_csv("~/Projects/CSAFE/2018_Longitudinal_Shoe_Project/Clean_Data/shoe-info.csv") %>%
  filter(ShoeID %in% str_sub(basename(imglist), 1, 3)) %>%
  select(ShoeID, Brand, Size) %>%
  mutate(Size = str_remove(Size, "[ MW]") %>% parse_number()) %>%
  crossing(tibble(Mask_foot = c("R", "L"), Shoe_foot = c("L", "R"))) %>%
  mutate(mask = purrr::pmap(list(Brand, Size, Mask_foot, ppi = 300), shoe_mask))

scan_info <- tibble(
  file = imglist,
  ShoeID = str_extract(basename(file), "^\\d{3}"),
  Shoe_foot = str_extract(basename(file), "\\d{6}[RL]") %>% str_remove_all("\\d"),
  date = str_extract(basename(file), "\\d{8}") %>% parse_date(format = "%Y%m%d")
) %>%
  left_join(select(shoe_info, ShoeID, Brand, Size, Shoe_foot)) %>%
  group_by(Shoe_foot, Brand) %>%
  sample_n(2) %>%
  ungroup() %>%
  group_by(ShoeID, Shoe_foot) %>%
  # arrange(desc(date)) %>%
  # filter(row_number() == 1) %>%
  ungroup() %>%
  mutate(
    img = purrr::map(file, EBImage::readImage, all = F),
    img = purrr::map(img, EBImage::channel, "luminance"),
    im_dim = purrr::map(img, dim)
  )

Parameters in the Rough Alignment algorithm

Image Exaggeration

  1. Gaussian blur
    • kernel size
    • kernel shape
    • sigma
  2. Threshold for binarization
  3. Opening diameter
  4. Closing diameter

Selection of Binarization Threshold

par(mfrow = c(1, 2))
plot(scan_info$img[[1]])
hist(scan_info$img[[1]])
em_thresh <- function(img, scale = 10, ngroups = 3) {
  imsmall <- img_resize(img, w = floor(dim(img)[1]/scale), h = floor(dim(img)[2]/scale))
  em <- mixtools::normalmixEM(imsmall, k = ngroups)
  values <- cbind(em$x, em$posterior)
  # values <- values[order(values[,1]),]
  # values[values[,2]*100 < values[,3], ]

  mean_idx <- order(em$mu, decreasing = F)

  # get probs for full img
  img_dens_functions <- lapply(mean_idx, function(x) 0*img + dnorm(img, mean = em$mu[x], sd = em$sigma[x]))

  img_dens_total <- abind::abind(img_dens_functions, along = 3) %>%
    apply(c(1, 2), sum)

  img_dens_ratio <- lapply(img_dens_functions, function(x) as.Image(x/(img_dens_total - x)))

  return(list(img_densities = img_dens_functions, img_ratios = img_dens_ratio, em = em))
}

emt_adidas <- em_thresh(scan_info$img[[1]])
emt <- em_thresh(scan_info$img[[2]])
Image(abind::abind(emt_adidas$img_ratios, along = 3)) %>% plot(all = T, nx = 3)
Image(abind::abind(emt$img_ratios, along = 3)) %>% plot(all = T, nx = 3)

The first image shows pixels(in white) identified as most likely providing signal data. The third image shows pixels identified as most likely belonging to the background. The second image shows intermediate pixels, which are not necessarily consistent with background or signal.

One primary advantage of using only the first image is that it is nearly discrete - the values are the likelihood ratio of belonging to signal vs. other (background + intermediate), and are almost completely separated between infinite and 0.

df <- tribble(~type, ~values,
              "adidas", as.numeric(emt_adidas$img_ratios[[1]]),
              "nike", as.numeric(emt$img_ratios[[1]])) %>%
  unnest(values)

df.inf <- df %>%
  group_by(type) %>%
  summarize(inf = sum(is.infinite(values)))

ggplot() + 
  geom_histogram(aes(x = values), data = df, binwidth = .1) + 
  scale_x_log10() + 
  geom_text(aes(x = 1, y = Inf, label = sprintf("%d infinite values", inf)), data = df.inf, hjust = 0, vjust = 1.2) + 
  facet_wrap(~type)

This allows us to get away from selecting a single threshold value in favor of computing an implicit automatic threshold via the EM algorithm.

Diameter selection

By run length

Idea - use rle to count runs in rows and columns to get an approximate idea of which parameters to use for tuning. E.g. using binarized image, runs of 0 will be either relatively short (adidas stripes, between Nike blobs) or relatively long (outside of the shoe).

bin_img <- normalize(emt$img_ratios[[1]])
bin_img[is.nan(bin_img)] <- 1

# bin_img[1000,] %>% as.numeric() %>% rle() %>% do.call("bind_cols", .) %>% filter(values == 1) %>% `[[`(1) %>% median()

col_runs <- apply(bin_img, 1, function(x) rle(x) %>% do.call("bind_cols", .)) %>%
  purrr::map2_df(., 1:length(.), ~dplyr::mutate(.x, idx = .y, type = "col", prop_lengths = lengths/dim(bin_img)[2]))

row_runs <- apply(bin_img, 2, function(x) rle(x) %>% do.call("bind_cols", .)) %>%
  purrr::map2_df(., 1:length(.), ~dplyr::mutate(.x, idx = .y, type = "row", prop_lengths = lengths/dim(bin_img)[1]))

runs <- bind_rows(col_runs, row_runs) %>%
  group_by(type) %>%
  mutate(pos = idx/max(idx)) %>%
  ungroup()

In this plot, there is a cluster of 0-runs of length 100(ish) and 250(ish), while the 1-runs are at most 60 pixels long, and usually closer to 10-20. The better question is how do we "see" that automatically?

ggplot(data = runs, aes(y = lengths, x = pos, fill = factor(values))) + geom_jitter(alpha = .05) + facet_wrap(~type + values, scales = 'free')

By blob size

# Clean up just enough to label well
bin_img_clean <- bin_img %>%
  erode(makeBrush(size = 1)) %>%
  dilate(matrix(c(1, 0, 1, 0, 1, 0, 1, 0, 1), byrow = T, nrow = 3)) %>%
  dilate(makeBrush(size = 3, shape = "line")) %>%
  # dilate(makeBrush(size = 3)) %>%
  erode(makeBrush(size = 3, shape = "disc")) %>%
  dilate(makeBrush(size = 9, shape = "disc"))

bin_img_label <- bwlabel(bin_img) %>%
  fillHull()
bin_img_clean_label <- bwlabel(bin_img_clean) %>%
  fillHull()

label_counts <- table(bin_img_label) %>% as.data.frame() %>% set_names(c("label", "freq")) 
tiny_labels <- label_counts %>% filter(freq <= 15)

clean_label_counts <- table(bin_img_clean_label) %>% as.data.frame() %>% set_names(c("label", "freq")) 
tiny_clean_labels <- clean_label_counts %>% filter(freq <= 2 * sum(makeBrush(size = 9, "disc")))

bin_big_clean_labels <- bin_img_clean_label
bin_big_clean_labels[bin_big_clean_labels %in% tiny_clean_labels$label] <- 0

bin_big_labels <- bin_img_label
bin_big_labels[bin_big_labels %in% tiny_labels$label] <- 0

par(mfrow = c(2, 3))
plot(bin_img)
colorLabels(bin_img_label) %>% plot()
colorLabels(bin_big_labels) %>% plot()
plot(bin_img_clean)
colorLabels(bin_img_clean_label) %>% plot()
colorLabels(bin_big_clean_labels) %>% plot()
bind_rows(mutate(label_counts, type = "binary image"),
          mutate(clean_label_counts, type = "cleaned binary image")) %>%
  filter(label != 0) %>%
  ggplot(aes(x = freq, colour = type)) + 
  stat_density(geom = "line") + 
  # coord_cartesian(xlim = c(1, 16000), ylim = c(0, .015)) + 
  scale_x_log10() + 
  xlab("Blob size in pixels")
par(mfrow = c(1, 6))
purrr::walk(c(100, 1000, 2500, 5000, 10000, 15000), function(z) {
  interesting_blobs <- clean_label_counts %>% filter(freq > z) 
  small_interesting_blobs_clean <- bin_img_clean_label * (bin_img_clean_label %in% as.numeric(as.character(interesting_blobs$label)))

  colorLabels(small_interesting_blobs_clean) %>% plot()
  text(x = dim(bin_img)[1]/2, y = dim(bin_img)[2] - 300, label = sprintf("Blobs bigger \n than %d", z), col = "white", adj = c(.5, .5))
})

Unfortunately, filtering by blobs may not be particularly useful when dealing with Adidas shoes.

For a bunch of images...

scan_info <- mutate(scan_info, em = purrr::map(img, em_thresh, ngroups = 3))

scan_info <- mutate(scan_info,
                    bin_img = purrr::map(em, ~{
                      z <- .$img_ratios[[1]]
                      z <- z > 10
                      as.Image(normalize(z))
                    }))

par(mfrow = c(2, 8))
purrr::walk(scan_info$img, plot)
purrr::walk(scan_info$bin_img, plot)

Filtering each image to include only points with a ratio of P(signal)/P(not signal) greater than 10 removes most of the noise in the images without scale-dependent parameters, but the resulting image is a bit fragmented spatially, because the EM algorithm is performed on point intensity only; it does not include any spatial context because including 2 additional dimensions vastly increases memory requirements and computational time.

Some additional cleaning may be useful, but ideally, would use parameters which are small enough relative to the image to not depend on image scaling within reason (e.g. an image which has dimensions 1.5x the length/width of a similar image).

From that point, we can label the image, so that each self contained region has a different numeric label. We can use these regions to facilitate certain additional assumptions:

easy_clean <- function(img) {
  img %>%
  erode(makeBrush(size = 1)) %>%
  # dilate(matrix(c(1, 0, 1, 0, 1, 0, 1, 0, 1), byrow = T, nrow = 3)) %>%
  dilate(makeBrush(size = 3, shape = "line")) %>%
  # dilate(makeBrush(size = 3)) %>%
  erode(makeBrush(size = 5, shape = "disc")) %>%
  dilate(makeBrush(size = 9, shape = "disc"))
}

clean_img_corners <- function(labeled_img, len = 50) {
  tmp <- bind_rows(
    mutate(as.data.frame(table(labeled_img[1:len, 1:len]), stringsAsFactors = F), corner = "1"),
    mutate(as.data.frame(table(labeled_img[1:len, (ncol(labeled_img) - len):ncol(labeled_img)]), stringsAsFactors = F), corner = "2"),
    mutate(as.data.frame(table(labeled_img[(nrow(labeled_img) - len):nrow(labeled_img), 1:len]), stringsAsFactors = F), corner = "3"),
    mutate(as.data.frame(table(labeled_img[(nrow(labeled_img) - len):nrow(labeled_img), (ncol(labeled_img) - len):ncol(labeled_img)]), stringsAsFactors = F), corner = "4")
  ) %>%
    set_names(c("label", "freq", "corner")) %>%
    mutate(label = as.numeric(as.character(label)))

  pct_label <- tibble(label = unique(tmp$label), pct = lapply(unique(tmp$label), function(x) mean(labeled_img == x)))

  pct_label <- filter(pct_label, pct <= .1)

  labeled_img[labeled_img %in% pct_label$label] <- 0
  labeled_img
}


scan_info <- scan_info %>%
  mutate(clean_img = purrr::map(bin_img, easy_clean),
         clean_img_hull = purrr::map(clean_img, fillHull),
         labeled_img = purrr::map(clean_img, bwlabel),
         labeled_img = purrr::map(labeled_img, clean_img_corners),
         labeled_fill = purrr::map(labeled_img, fillHull),
         label_counts = purrr::map(labeled_fill, ~table(.) %>% as.data.frame(stringsAsFactors = F) %>% set_names(c("label", "freq"))))

par(mfrow = c(1, 8)) 
purrr::walk(scan_info$labeled_fill, ~colorLabels(.) %>% plot())

The default parameters (erode 1px, dilate 3px in a line, erode 5px in a disc, dilate 9px in a disc) seem to work fairly well on both shoe models.

At this point, we can use the labeled image as a mask: if the label is nonzero, we take the value of the pixel in the original image.

scan_info <- mutate(scan_info, censored_img = map2(img, labeled_fill, ~{.x * (.y > 0) + (.y == 0)}))

purrr::walk(scan_info$censored_img, ~plot(normalize(.)))

In order to create an exaggerated mask of the shoe print to align with the default mask, we start by blurring the image using a gaussian filter. Rather than using predetermined parameters, we instead select the sigma of the gaussian distribution to be 1/50th of the size of the dimensions of a square image with the same area as our image. Thresholding the blurred image at the median provides a first pass exaggerated shoe mask.

scan_info <- mutate(scan_info, blurred_img = purrr::map2(censored_img, im_dim, ~gblur(.x, sigma = sqrt(prod(.y))/50, boundary = "replicate")))
par(mfrow = c(3, 8))
purrr::walk(scan_info$blurred_img, plot)
purrr::walk(scan_info$blurred_img, ~plot(. < median(.)))
purrr::walk(scan_info$blurred_img, ~plot(colorLabels(bwlabel(. < median(.)))))

Labeling the resulting image's distinct regions, we can select the region that is not the background with the largest area, and then fill any holes in that region. Finally, another image-area based operation can be conducted: shrinking the image by 1/5 of the sqrt(area) of our image, then enlarging the image by the same amount. This smoothes out the image boundary and also minimizes the effects of interactions with the page border.

scan_info <- mutate(scan_info, exag_mask = purrr::map(blurred_img, ~{
  z <- (. < median(.))
  z <- bwlabel(z)
  labels <- table(z[z!=0]) %>% sort(decreasing = T)
  fillHull(z == as.numeric(names(labels[1]))) %>%
    opening(makeBrush(size = floor(sqrt(length(.))/5), "disc"))
}))
par(mfrow = c(1, 8))
purrr::walk(scan_info$exag_mask, plot)

Image Aggregation

get_subimage <- function(img, row, col, width = 5, height = 5) {
  idx_row <- pmin(nrow(img), pmax(1, row:(row + height)))
  idx_col <- pmin(ncol(img), pmax(1, col:(col + width)))

  img[idx_col, idx_row]
}

get_coord_sample <- function(img, N = 50000) {
  tibble::tibble(row = sample(1:nrow(img), N, replace = T), col = sample(1:ncol(img), N, replace = T)) %>%
    mutate(subimg = purrr::map2(row, col, ~get_subimg(img = img, row = .x, col = .y)))
}

bin_img_sample <- get_coord_sample(bin_img)


srvanderplas/ShoeScrubR documentation built on Nov. 27, 2019, 2:09 p.m.