R/clf_check_test.R

Defines functions vpr_category_create vpr_autoid_create vpr_manual_classification

Documented in vpr_autoid_create vpr_category_create vpr_manual_classification

## manual classifier check

vpr_manual_classification <-
  function(day,
           hour,
           basepath,
           category_of_interest,
           gr = TRUE,
           scale = 'x300',
           opticalSetting = 'S2',
           img_bright = TRUE,
           threshold_score) {

    #' Function to check results of classification manually
    #'
    #'
    #' Displays each image in day hour specified,
    #' prompts user to confirm or deny classification.
    #' If classification is denied, asks for a reclassification
    #'  value based on available category
    #'
    #' @param day day of interest in autoid (3 chr)
    #' @param hour hour of interest in autoid (2 chr)
    #' @param basepath path to folder containing autoid files (e.g., 'extdata/COR2019002/autoid')
    #' @param category_of_interest list of category folders you wish you sort through
    #' @param gr logical indicating whether pop up graphic menus are used (user preference - defaults to TRUE)
    #' @param scale argument passed to \code{\link{image_scale}}, default = 'x300'
    #' @param opticalSetting specifies optical setting of VPR, defining image frame
    #'   size, current options are 'S0', 'S1', 'S2' (default), 'S3', see further
    #'   info in details
    #' @param img_bright logical value indicating whether or not to include a blown
    #'  out high brightness version of image (can be helpful for viewing dark field
    #'  fine appendages)
    #' @param threshold_score (optional) a numeric value defining the minimum confidence
    #'   value, under which automatic classifications will be passed through
    #'   manual reclassification. This argument should match the threshold
    #'   provided in `vpr_autoid_copy()`
    #'
    #' @details Optical Setting frame sizes: S0 = 7x7 mm, S1 = 14x14mm, S2 =
    #'   24x24mm, S3 = 48x48 mm. These settings define the conversion factor from
    #'   pixels to millimetres and calculate image size for classification
    #'   reference
    #'
    #'
    #' @section Development:
    #'   \itemize{
    #'       \item Add "undo" functionality to go back on a typing mistake
    #'       \item  Fix scaling/ size issue so images are consistently sized
    #'       }
    #'
    #'@export

   # Initialize settings for converting pixels to length for different VPR optical settings

    if (opticalSetting == 'S0') {
      # px to mm conversion factor
      frame_mm <- 7
      mm_px <-
        frame_mm / 1024 # 1024 is resolution of VPR images (p.4 DAVPR manual)
      pxtomm <- 1 / mm_px
    }

    if (opticalSetting == 'S1') {
      # px to mm conversion factor
      frame_mm <- 14
      mm_px <-
        frame_mm / 1024 # 1024 is resolution of VPR images (p.4 DAVPR manual)
      pxtomm <- 1 / mm_px
    }

    if (opticalSetting == 'S2') {
      # px to mm conversion factor
      frame_mm <- 24
      mm_px <-
        frame_mm / 1024 # 1024 is resolution of VPR images (p.4 DAVPR manual)
      pxtomm <- 1 / mm_px
    }

    if (opticalSetting == 'S3') {
      # px to mm conversion factor
      frame_mm <- 42 # correct conversion factor (7/11/2022)
      mm_px <-
        frame_mm / 1024 # 1024 is resolution of VPR images (p.4 DAVPR manual)
      pxtomm <- 1 / mm_px
    }

    #Start image classification process
    day_hour <- paste0('d', day, '.h', hour)
    dirpath <- file.path("manual_reclassification_record",day_hour)
    dir.create(path = dirpath, showWarnings = FALSE, recursive = TRUE)
    existingFiles <- list.files(dirpath, full.names = TRUE)

    ans <-
      menu(
        c('Yes', 'No'),
        graphics = FALSE,
        title = paste(
          'WARNING!!! ALL EXISTING FILES IN', day_hour,
          'ARE ABOUT TO BE DELETED. DO YOU WISH TO PROCEED?'
        )
      )

    if (ans == 1) {

      file.remove(existingFiles)

    } else {

      warning(immediate. = TRUE,
              paste('CAUTION, FILES FOR', day_hour, 'ARE BEING APPENDED!!'))
    }

    categoryFolders_og <- list.files(basepath, full.names = TRUE)
    categoryNames <- list.files(basepath)
    allcategory <- list.files(basepath)

    categoryFolders <-categoryFolders_og[categoryNames %in% category_of_interest]
    categoryNames <- categoryNames[categoryNames %in% category_of_interest]

    if (length(categoryFolders) == 0) {

      stop('No category folders match category of interest!')

    }

    t_f <- dir.exists(categoryFolders)

    # Make an empty list for re-classifications with named elements for each category

    reclassified <- vector("list", length(allcategory))
    names(reclassified) <- allcategory

    #START FOR LOOP
    for (i in seq_len(length(categoryFolders))) {

      misclassified <- vector()

      print(paste('CATEGORY START : ', categoryFolders[i]))

      y <- readline(paste('CONFIRM NEW CATEGORY : ', categoryFolders[i]))

      if (t_f[i] == FALSE) {

        print(paste('category : ', categoryFolders[i], 'DOES NOT EXIST!'))
        SKIP = TRUE

      } else {

        dayHrFolders <- list.files(categoryFolders[i], full.names = TRUE)
        dayHrFolder <- grep(dayHrFolders, pattern = day_hour, value = TRUE)

        if (length(dayHrFolder) == 0) {

          print(paste('category : ', categoryFolders[i], 'DOES NOT EXIST IN ', day_hour, '!'))
          SKIP = TRUE

        } else {

          SKIP = FALSE

          aidFolder <- grep(dayHrFolders, pattern = 'aid$', value = TRUE)
          aidFile <- list.files(aidFolder, pattern = day_hour, full.names = TRUE)
          aid_dat <- read.table(aidFile, stringsAsFactors = FALSE) # TODO read in pred_results instead of aid

          if(missing(threshold_score)){

            # aid file info
            aid_dat <- unique(aid_dat$V1) # KS added unique to duplicate bug fix

          } else {

            aid_dat_t <- aid_dat %>%
              dplyr::filter(., V2 < threshold_score) %>%
              dplyr::arrange(., V1)

            aid_scr_t <- aid_dat_t$V2

          }

          #Obtain ROI paths from the category, day, and hour of interest (from copied images)
          rois <- sort(list.files(dayHrFolder, full.names = TRUE))

          for (ii in seq_len(length(rois))) {
            print(paste(ii, '/', length(rois)))

            if(missing(threshold_score)){

              scr_tmp <- 9999

            } else {

              scr_tmp <- aid_scr_t[ii] # check to make sure ROI image matches score displayed

              if(unlist(vpr_roi(aid_dat_t$V1[ii])) != unlist(vpr_roi(rois[ii]))){

                stop('Mismatch between CNN score and copied ROI, verify autoid and ROI inputs!')

              }

            }

              img_tmp <- magick::image_read(rois[ii])
              imgdat <- magick::image_info(img_tmp)

              img <- magick::image_read(rois[ii], strip = FALSE) %>%
                magick::image_scale(scale) %>%
                magick::image_annotate(paste(categoryNames[i], "(roi.", unlist(vpr_roi(rois[ii])), ")"), color = "red", size = 12) %>%
                magick::image_annotate(text = paste("scoreCNN = ", round(scr_tmp, digits = 2)),
                                       location = "+0+20",
                                       color = "red") %>%
                magick::image_annotate(text = paste(round(imgdat$width/pxtomm, digits = 2), "x", round(imgdat$height/pxtomm, digits = 2), "mm"),
                                       location = "+0+10",
                                       color = "red")

            if (img_bright == TRUE) {
              img_n <- magick::image_modulate(img, brightness = 500)

              img_f <- magick::image_append(c(img, img_n))

              print(img_f)

            } else {

              print(img)

            }

            #Pop up menu - perform classification
            ans <-
              menu(
                choices = c('Yes', 'No'),
                graphics = gr,
                title = paste(
                  "Is the classification, ",
                  categoryNames[i],
                  ", accurate? (y/n)"
                )
              )

            if (ans == 1) {

            } else {

              if(missing(threshold_score)){

                misclassified <- c(misclassified, aid_dat[[ii]])

              } else {

                misclassified <- c(misclassified,
                                   paste(aid_dat_t[ii,], collapse = " ")) # if threshold has been applied
                # TODO test that this is pulling correctly

              }

              # update to create generic category options
              # EC 2019 October 30
              ans <- menu(c(allcategory),
                          graphics = gr,
                          title = "Appropriate Category Classification?")

              if(missing(threshold_score)){

                reclassified[[ans]] <- c(reclassified[[ans]], aid_dat[[ii]])

              } else {

                # if threshold has been applied
                reclassified[[ans]] <- c(reclassified[[ans]],
                                         paste(aid_dat_t[ii,], collapse = " "))

              }

            }

          }

          # Write information to file
          withr::with_output_sink(paste0(dirpath, '/misclassified_', categoryNames[i], '.txt'),
                                  append = TRUE,
                                  code = {
                                    cat(misclassified, sep = '\n')
                                  })

        }
      }


      if (SKIP == TRUE) {

        withr::with_output_sink(paste0(dirpath, '/misclassified_', categoryNames[i], '.txt'),
                                append = TRUE,
                                code = {
                                  cat('\n')
                                })
      }

    } #end of category for loop

    # Write reclassified files for each category
    for (i in seq_len(length(reclassified))) {
      category_id <- names(reclassified[i])
      recl_tmp <- reclassified[[i]]

      # Make a reclassify file only for category that need to be reclassified
      if (length(recl_tmp != 0)) {

        withr::with_output_sink(paste0(dirpath, '/reclassify_', category_id, '.txt'), append = TRUE, code = {
          cat(recl_tmp, sep = '\n')

        })

      }

    }

}

vpr_autoid_create <- function(reclassify, misclassified, basepath, day, hour, mea = TRUE, categories) {
  #' Modifies aid and aid mea files based on manual reclassification
  #' @author E. Chisholm
  #'
  #' @param reclassify list of reclassify files (output from vpr_manual_classification())
  #' @param misclassified list misclassify files (output from vpr_manual_classification())
  #' @param basepath path to folder containing autoid files (e.g., 'extdata/COR2019002/autoid')
  #' @param day day identifier for relevant aid & aidmeas files
  #' @param hour hour identifier for relevant aid & aidmeas files
  #' @param mea logical indicating whether or not there are accompanying measurement files to be created
  #' @param categories A list object with all the potential classification categories
  #'
  #' @examples
  #' \dontrun{
  #' basepath <- 'E:/autoID_EC_07032019/'
  #' day <- '289'
  #' hr <- '08'
  #' categories <-
  #' c("bad_image_blurry", "bad_image_malfunction", "bad_image_strobe", "Calanus", "chaetognaths",
  #' "ctenophores", "krill", "marine_snow", "Other", "small_copepod", "stick")
  #' day_hour_files <-  paste0('d', day, '.h', hr)
  #' misclassified <- list.files(day_hour_files, pattern = 'misclassified_', full.names = TRUE)
  #' reclassify <- list.files(day_hour_files, pattern = 'reclassify_', full.names = TRUE)
  #' vpr_autoid_create(reclassify, misclassified, basepath, categories)
  #' }
  #' @export

  . <- day <- hour <- NA
  # get day and hour values
  day_misclassified <- unique(vpr_day(misclassified))[[1]]
  hour_misclassified <- unique(vpr_hour(misclassified))[[1]]

  if(length(day_misclassified) != 1 | length(hour_misclassified) != 1) {
    stop("MULTIPLE DAYS OR HOURS AMONG MISCLASSIFIED FILES, PLEASE CORRECT.")
  }

  categoryNames <- list.files(basepath)
  categoryFolders <- sort(list.files(basepath, full.names = TRUE))
  misclassified <- sort(misclassified)

  # loop through misclassified folders
  for (i in seq_len(length(misclassified))) {

    if (length(categoryFolders) != length(misclassified)) {
      stop('NUMBER OF MISCLASSIFIED FILES MUST == NUMBER OF CATEGORIES IN BASEPATH')
    }

    # get category value and check
    category <- vpr_category(misclassified[i], categories = categoryNames)
    categoryFolder <- grep(categoryFolders[i], pattern = category, value = TRUE)

    categoryFolder_check <- gsub(pattern = basepath, replacement = "", categoryFolder)
    categoryFolder_check <- gsub(pattern = "/", replacement = "", categoryFolder_check)

    if (category != categoryFolder_check) {
      stop('CATEGORIES ARE BEING CONFUSED - INVESTIGATE')
    }

    if (!category %in% categoryNames) {
      stop(paste(category, "is not a valid category name. Please run vpr_category_create() to create proper file structure within basepath"))
    }
    # read in misclassified ROIs
    mis_roi <- readLines(misclassified[i])

    if(unique(mis_roi)[[1]] == "") {

      mis_roi <- as.character()

    }

    if (length(mis_roi) != 0) { # if there are ROIs that were misclassified
      day_hour <- unique(substr(sub(mis_roi, pattern = ".*d",
                                    replacement = "d"), 1, 8))
      day_hour <- gsub(pattern = "\\\\", replacement = ".",
                       x = day_hour)
      mis_roi_gen <- unlist(vpr_roi(mis_roi))
      mis_roi_df <- data.frame(mis_roi_gen, day_hour, category,
                               stringsAsFactors = FALSE)
      aidFolder <- list.files(categoryFolder, pattern = "^aid$",
                              full.names = TRUE)
      mis_roi_df <- mis_roi_df %>% dplyr::group_by(., day_hour)
      if (length(unique(mis_roi_df$day_hour)) > 1) {
        stop("MULTIPLE HOURS IN ONE FILE, PLEASE CORRECT.")
      }
    }else { # if there were no ROIs misclassified
      print(paste("Blank misclassified file found for",
                  category, "!"))
      day_n <- vpr_day(misclassified[i])
      hr_n <- vpr_hour(misclassified[i])
      day_hour <- paste(day_n, hr_n, sep = ".")
      aidFolder <- list.files(categoryFolder, pattern = "^aid$",
                              full.names = TRUE)
    }
    # read in original aid file
    aids <- list.files(aidFolder, full.names = TRUE)
    # Ensure backward compatibility with files that do not have .txt extension
    # EO 12/12/24
    aid_list_old_fn <- grep(aids, pattern = paste0(day_hour, "$|", day_hour, "\\.txt$"), value = TRUE)
    if (length(aid_list_old_fn) == 0) { # create a dummy file if no aid exists
      aid_list_old_fn <- paste0(aidFolder, "/dummy_aid.", day_hour, ".txt")  # Changed to .txt
      withr::with_output_sink(aid_list_old_fn, code = {
        cat("\n")
      })
      print(paste("DUMMY FILE CREATED FOR", category, " : ", aid_list_old_fn))
      DUMMY = TRUE
      aid_new <- NULL
    } else {
      aid_list_old <- readLines(aid_list_old_fn)
      aid_list_old <- unique(aid_list_old)
      aid_old_gen <- unlist(vpr_roi(aid_list_old))

      if (length(mis_roi) == 0) { # if nothing was misclassified, pull original aid through
        aid_new <- aid_list_old
      } else {
          # remove duplicates
        sub_mis_roi <- mis_roi_df %>% dplyr::filter(.,
                                                    day_hour == unique(mis_roi_df$day_hour))
        mm <- aid_old_gen %in% sub_mis_roi$mis_roi_gen # KS solution 2023/09/27 - fixed ROI duplication error
        ind <- grep(mm, pattern = "TRUE")
        aid_new <- aid_list_old[-ind] # remove misclassified ROIs
        cat(paste(">>>>", length(ind), "ROIs removed from",
                  category, "in", unique(day_hour), "\n>>>> File:",
                  aid_list_old_fn, "\n"))
      }
      DUMMY <- FALSE
    }
    # fix matching meas file
    if (mea == TRUE) {
      aidMeaFolder <- list.files(categoryFolder, pattern = "^aidmea$",
                                 full.names = TRUE)
      aidMeaFile <- list.files(aidMeaFolder, pattern = paste0("*", day_hour, ".txt"), full.names = TRUE)  # Changed to .txt EO 12/12/24
      if (length(aidMeaFile) == 0) {
        aidMeaFile <- paste0(aidMeaFolder, "/dummy_aid.mea.", day_hour, ".txt")  # Changed to .txt EO 12/12/24
        withr::with_output_sink(aidMeaFile, code = {
          cat("\n")
        })
        print(paste("DUMMY FILE CREATED FOR MEAS OF",
                    category, " : ", aidMeaFile))
        aidMea_new <- NULL
        DUMMY = TRUE
      }
      else {
        aidMea_old <- read.table(aidMeaFile)
        aidMea_old <- unique(aidMea_old)
        if (length(mis_roi) == 0) {
          aidMea_new <- aidMea_old
        }
        else {
          aidMea_new <- aidMea_old[-ind, ]
          cat(paste(">>>>", length(ind), "Measurements removed from",
                    category, "in", unique(day_hour), "\n>>>> File:",
                    aidMeaFile, "\n"))
        }
        DUMMY = FALSE
      }
    }

    # check for any ROIs to be reclassified
    reclassify_category <- grep(reclassify,
                            pattern = paste0('reclassify_',category, '.txt'), # FIX [EOG] - Should only pull exact matches to category, will be sensitive to file naming conventions CAUTION
                            value = TRUE)

    if (length(reclassify_category) == 0) { # nothing to be reclassified
      print(paste("No", category, "to be reclassified"))
      if (mea == TRUE) {
        aidMea_final <- aidMea_new
      }
      aid_final <- aid_new
      if (DUMMY == TRUE) {
        warning(print("No original data and no reclassified data, consider removing category."))
      }
    } else { # add ROIs which were reclassified to category
      recl_roi <- readLines(reclassify_category)
      day_hour_re <- paste(day, hour, sep = ".")
      recl_roi_gen <- unlist(vpr_roi(recl_roi))
      if (length(unique(day_hour_re)) > 1) {
        stop(paste(reclassify_category, "has more than one unique hour value!\n
                   Please double check file."))
      }
      recl_roi_df <- data.frame(recl_roi_gen, day_hour_re,
                                recl_roi, stringsAsFactors = FALSE)
      recl_roi_df <- recl_roi_df %>% dplyr::filter(., !duplicated(recl_roi_gen))
      aid_final <- c(aid_new, recl_roi_df$recl_roi)
      cat(paste(">>>>", length(recl_roi_df$recl_roi),
                "ROIs added to", category, "in", unique(day_hour),
                "\n"))
      aid_fn_list <- list()
      for (l in seq_len(length(categoryFolders))) {
        all_aids <- list.files(file.path(categoryFolders[[l]],
                                         "aid"), full.names = TRUE)
        aid_fn_list[[l]] <- grep(all_aids, pattern = day_hour,
                                 value = TRUE)
      }
      # fix matching meas file
      if (mea == TRUE) {
        aidm_fn_list <- list()
        for (l in seq_len(length(categoryFolders))) {
          all_aidms <- list.files(file.path(categoryFolders[[l]],
                                            "aidmea"), full.names = TRUE)
          aidm_fn_list[[l]] <- grep(all_aidms, pattern = day_hour,
                                    value = TRUE)
        }
        roimeas_dat_combine <- vpr_autoid_read(file_list_aid = unlist(aid_fn_list),
                                               file_list_aidmeas = unlist(aidm_fn_list), export = "aidmeas",
                                               station_of_interest = NA, warn = FALSE)
        recl_roi_num <- recl_roi_df$recl_roi_gen
        recl_roi_meas <- roimeas_dat_combine[roimeas_dat_combine$roi %in%
                                               recl_roi_num, ]
        if (length(recl_roi_meas$roi_ID) > length(recl_roi)) {
          print(paste("Warning, duplicate ROI detected! Removing automatically"))
          print(recl_roi_meas[duplicated(recl_roi_meas$roi_ID),
          ])
          recl_roi_meas <- recl_roi_meas %>% dplyr::filter(.,
                                                           !duplicated(recl_roi_meas$roi_ID))
        }
        col_names <- c("Perimeter", "Area",
                       "width1", "width2", "width3",
                       "short_axis_length", "long_axis_length")
        recl_roi_meas <- recl_roi_meas %>% dplyr::select(.,
                                                         col_names)
        aidMea_list <- list()
        for (iii in 1:7) {
          aidMea_list[[iii]] <- c(aidMea_new[, iii],
                                  unname(recl_roi_meas[, iii]))
        }
        aidMea_final <- data.frame(matrix(unlist(aidMea_list),
                                          ncol = length(aidMea_list)))
        cat(paste(">>>>", length(recl_roi_meas$Perimeter),
                  "Measurements added to", category, "in",
                  unique(day_hour), "\n"))
        if (length(recl_roi_meas$Perimeter) != length(recl_roi_df$recl_roi_gen)) {
          warning("Measurements and ROI numbers in reclassification do not match!!!")
        }
      }
    }
    # write new_autoid files
    dirpath <- file.path("new_autoid", category[[1]])
    dir.create(dirpath, showWarnings = FALSE, recursive = TRUE)
    if (mea == TRUE) {
      aidMea_final_nm <- paste0("new_aid.mea.", unique(day_hour), '.txt')
      aidMea_final_fn <- file.path(dirpath, "aidmea",
                                   aidMea_final_nm)
      dir.create(file.path(category, "aidmea"), showWarnings = FALSE,
                 recursive = TRUE)
      write.table(file = aidMea_final_fn, aidMea_final,
                  sep = "    ", quote = FALSE, col.names = FALSE,
                  row.names = FALSE)
      cat(paste(">>>> New aidmea file created for",
                category, "in", unique(day_hour), "\n"))
    }
    aid_final_nm <- paste0("new_aid.", unique(day_hour), '.txt')
    aid_final_fn <- file.path(dirpath, "aid", aid_final_nm)
    dir.create(file.path(dirpath, "aid"), showWarnings = FALSE,
               recursive = TRUE)

    #aid_final <- sub(" .*", "", aid_final) #####


    write.table(file = aid_final_fn, aid_final, quote = FALSE,
                col.names = FALSE, row.names = FALSE)
    cat(paste(">>>> New aid file created for",
              category, "in", unique(day_hour), "\n"))
    if (DUMMY == TRUE & mea == TRUE) {
      atf <- grep(aid_list_old_fn, pattern = "dummy")
      amtf <- grep(aidMeaFile, pattern = "dummy")
      if (length(atf) != 0 & length(amtf) != 0) {
        print(paste("Deleting dummy files!"))
        print(paste(aidMeaFile, " & ", aid_list_old_fn))
        unlink(aid_list_old_fn)
        unlink(aidMeaFile)
      }
    }

    if (DUMMY == TRUE & mea == FALSE) {
      atf <- grep(aid_list_old_fn, pattern = "dummy")
      if (length(atf) != 0) {
        print(paste("Deleting dummy file!"))
        print(aid_list_old_fn)
        unlink(aid_list_old_fn)
      }
    }

  }
}

vpr_category_create <- function(category, basepath) {
  #' Create a new category to be considered for classification after processing with VP
  #'
  #' creates empty directory structure to allow consideration of new category during vpr_manual_classification()
  #'
  #' @param category new category name to be added (can be a list of multiple category names)
  #' @param basepath path to folder containing autoid files (e.g., 'extdata/COR2019002/autoid')
  #'
  #' @return empty directory structure using new category name inside basepath
  #' @export
  #'
  #'
  #'
  #'
  for (i in seq_len(length(category))) {
    # create new category folder
    newcategorypath <- file.path(basepath, category[[i]])
    dir.create(newcategorypath)

    # create blank aid and aidmeas folders

    dir.create(paste0(newcategorypath, '/aid'), showWarnings = FALSE)
    dir.create(paste0(newcategorypath, '/aidmea'), showWarnings = FALSE)

  }
}
Echisholm21/vprr documentation built on Feb. 9, 2025, 4:21 p.m.