R/fill_empties_Q.R

Defines functions fill_empties_Q

Documented in fill_empties_Q

#' Randomly select a row prototype to fill an empty row prototype with
#'
#' @param data The data being biclustered. Must to be a data matrix with only numbers and missing values in the data set. It should have row names and column names.
#' @param obj A matrix for row clusters, typically named Q
#' @param row_min_num Minimum row prototype size in order to be eligible to be chosen when filling an empty row prototype. Default is 10.
#' @param row_num_to_move Number of rows to remove from the sampled prototype to put in the empty row prototype. Default is 5.
#'
#' @importFrom utils head
#' @return A matrix for row clusters, i.e., a Q matrix.

fill_empties_Q <- function(data, obj,
                                                    row_min_num = 10,
                                                    row_num_to_move = 5) {
  data <- as.matrix(data)

  empty_protos <- which(colSums(obj) == 0)


  unassigned <- which(rowSums(obj) == 0)

  if(length(unassigned) == 0 & length(empty_protos) == 0) {

    return(obj)

  } else if(length(unassigned) == 0 & length(empty_protos) > 0) {

    num_to_fill <- length(empty_protos)

    if(all(colSums(obj, na.rm = TRUE) < row_min_num)) {
      stop(
        paste0(
          "No row groups with at least row_min_num = ", row_min_num,
          " rows. Specify a smaller row_min_num value."
        )
      )
    }

    for(j in 1:num_to_fill) {

      protos_to_choose_from <- which(colSums(obj, na.rm = TRUE) >= row_min_num)

      num_in_each <- colSums(matrix(obj[, protos_to_choose_from]))

      sampling_frame <- rep(protos_to_choose_from, num_in_each)

      proto_to_use <- sample(sampling_frame, 1)

      dummy_var <- rep(0, ncol(obj))
      dummy_var[empty_protos[j]] <- 1

      dummy_var <- dummy_var

      chosen_proto_members <- which(obj[, proto_to_use] == 1)


      mean_row <- mean(rowMeans(as.matrix(data[chosen_proto_members,]), na.rm = TRUE), na.rm = TRUE)
      row_means <- rowMeans(as.matrix(data[chosen_proto_members,]), na.rm = TRUE)

      similarity <- (row_means - mean_row) ^ 2
      to_move <- which(similarity %in% head(sort(similarity, decreasing = TRUE),
                                             n = row_num_to_move))

      for(i in 1:row_num_to_move) {
        if(is.na(chosen_proto_members[to_move[i]])) {
          stop(
            "No row clusters with at least row_num_to_move - 1 rows. Specify a smaller row_num_to_move value."
          )
        }
        obj[chosen_proto_members[to_move[i]],] <- dummy_var
      }

    }

    return(obj)
  } else {
    num_to_assign <- length(unassigned)

    for(i in 1:num_to_assign) {
      empty_protos <- colSums(obj)

      obj[unassigned[i], which.min(empty_protos)] <- 1
    }

    return(obj)
  }

}

Try the biclustermd package in your browser

Any scripts or data that you put into this service are public.

biclustermd documentation built on June 17, 2021, 5:11 p.m.