R/utils.R

Defines functions sample_vec gds_node_exists concat_df_cols concat_list sequence_along_chr_unscaled sequence_along_chr_scaled valid_colors valid_colors_ set_highlight_col set_chr_col remove_0_pval replace_0_pval set_thin_logical remove_na valid_chr preprocess_arg_check

# check preprocessing arguments
preprocess_arg_check <- function(
  x, chromosome, signif, signif.col, pval.colname, chr.colname, pos.colname, preserve.position
) {
  preprocess_checklist <- list(signif.col = signif.col)
  # check significance cutoff exists
  if (length(signif) < 1) stop("At least one significance threshold should be provided.")

  # check that significance cutoff is numeric
  if (!is.numeric(signif)) stop("signif should be a numeric vector.")

  # check signif.col
  if (is.null(signif.col)) {
    preprocess_checklist$signif.col <- rep("grey", length(signif))
    preprocess_checklist$signif.col[which.max(-log10(signif))] <- "black"
  } else if (!all(valid_colors(signif.col))) {
    warning("invalid signif.col colors... using default colors.")
    preprocess_checklist$signif.col <- rep("grey", length(signif))
    preprocess_checklist$signif.col[which.max(-log10(signif))] <- "black"
  } else if (length(signif) != length(signif.col)) {
    warning("Length of signif and signif.col do not match.")
    if (length(signif.col) > length(signif)) {
      signif.col <- signif.col[1:length(signif)]
    } else {
      signif.col <- rep(signif.col, length.out = length(signif))
    }
    preprocess_checklist$signif.col <- signif.col
  }

  # check that the supplied column names exist
  if (!all(c(is.character(pval.colname), is.character(chr.colname), is.character(pos.colname)))) {
    stop("Column names should be characters.")
  }
  if (!all(c(pval.colname, chr.colname, pos.colname) %in% colnames(x))) {
    tmp <- c(pval.colname, chr.colname, pos.colname)
    stop(sprintf("Column name(s) not in data: %s.", paste0(tmp[!(tmp %in% colnames(x))], collapse = ", ")))
  }
  if (pval.colname == "log10pval") {
    stop("Choose a different name for pvalue column name.")
  }

  # check that the supplied chromosomes exist
  if (!is.null(chromosome)) {
    valid_chr(x, chromosome, chr.colname)
  }

  # check that the values in p-value column are valid
  if (any(x[[pval.colname]] < 0, na.rm = TRUE) | any(x[[pval.colname]] > 1, na.rm = TRUE)) stop("p.value is a probability between 0 and 1.")

  # check that column names are valid
  if (!is.numeric(x[[pval.colname]])) stop(pval.colname, " should be a numeric column.")
  if (!is.numeric(x[[pos.colname]])) stop(pos.colname, " should be a numeric column.")

  # check that values in p value column are correct
  if (any(x[[pval.colname]] < 0, na.rm = TRUE) | any(x[[pval.colname]] > 1, na.rm = TRUE)) stop("p.value is a probability between 0 and 1.")

  if (length(preserve.position) != 1 | !is.logical(preserve.position)) {
    stop("preserve.position should be TRUE or FALSE.")
  }

  return(preprocess_checklist)
}

# check valid chromosome argument
valid_chr <- function(x, chromosome, chr.colname) {
  if (length(chromosome) != 1) stop("Only 1 chromosome should be specified")
  if (!(chromosome %in% x[[chr.colname]])) stop("The supplied chromosome does not exist.")
  invisible()
}

# remove entries where position, chromosome, or pvalue is missing
remove_na <- function(x, chr.colname, pos.colname, pval.colname) {
  na_remove <- which(is.na(x[[chr.colname]]) | is.na(x[[pos.colname]]) | is.na(x[[pval.colname]]))
  if (length(na_remove) > 0) {
    warning("Removed ", length(na_remove), " rows due to missing chromosome/position/pvalue.\n")
    x <- x[-na_remove,]
  }
  if (nrow(x) < 1) {
    stop("Empty rows after omitting missing chromosome/position/pvalue.\n")
  }
  return(x)
}

set_thin_logical <- function(thin, chromosome) {
  if (is.null(thin)) {
    if (is.null(chromosome)) {
      return(TRUE)
    } else {
      return(FALSE)
    }
  }
  return(thin)
}

# TEMPORARY: replace entries where p-value is zero with the minimum
# used by manhattan data preprocess andqqunif
replace_0_pval <- function(x) {
  zero_pval <- which(x == 0)
  if (length(zero_pval) > 0) {
    warning("Replacing p-value of 0 with the minimum.")
    x[zero_pval] <- min(x[-zero_pval], na.rm = TRUE)
  }
  return(x)
}

# remove entries where p-value is zero
remove_0_pval <- function(x) {
  zero_pval <- which(x == 0)
  if (length(zero_pval) > 0) {
    warning("Removing p-value of 0.")
    x <- x[-zero_pval]
  }
  return(x)
}

set_chr_col <- function(chr.col, nchr, chr.order) {
  if (is.null(chr.col)) {
    chr.col <- stats::setNames(rep_len(RColorBrewer::brewer.pal(8, "Dark2"), nchr), chr.order)
  } else {
    if (!all(valid_colors(chr.col))) {
      warning("Invalid chr.col colors. Using default colors")
      chr.col <- stats::setNames(rep_len(RColorBrewer::brewer.pal(8, "Dark2"), nchr), chr.order)
    }
    if (!is.null(names(chr.col))) {
      if (!all(chr.order %in% names(chr.col))) {
        stop("names(chr.col) is missing values from chr.colname.")
      }
    } else {
      if (nchr > length(chr.col)) {
        warning("chr.col is recycled to match chr.order length")
        chr.col <- rep(chr.col, length.out = nchr)
        names(chr.col) <- chr.order
      } else {
        warning(paste0("Using first ", nchr, " colors for chr.col."))
        chr.col <- chr.col[1:nchr]
        names(chr.col) <- chr.order
      }
    }
  }
  return(chr.col)
}

set_highlight_col <- function(x, highlight.colname, highlight.col) {
  if (!(highlight.colname %in% colnames(x))) stop(paste0(highlight.colname, " not in data."))
  highlight.levels <- unique(x[[highlight.colname]])
  if (is.null(highlight.col)) {
    highlight.col <- RColorBrewer::brewer.pal(length(highlight.levels), "Dark2")
    names(highlight.col) <- as.character(highlight.levels)
  } else {
    if (!all(valid_colors(highlight.col))) stop("Please provide valid colors.")
    if (!is.null(names(highlight.col))) {
      if (!all(highlight.levels %in% names(highlight.col))) {
        stop("names(highlight.col) is missing values from column ", highlight.colname, ".")
      }
    } else {
      if (length(highlight.levels) > length(highlight.col)) {
        warning("highlight.col is recycled to match unique values of ", highlight.colname, ".")
        highlight.col <- rep(highlight.col, length.out = length(highlight.levels))
        names(highlight.col) <- highlight.levels
      } else if (length(highlight.levels) < length(highlight.col)) {
        warning("Using first ", length(highlight.levels), " colors.")
        highlight.col <- highlight.col[1:length(highlight.levels)]
        names(highlight.col) <- highlight.levels
      } else {
        names(highlight.col) <- highlight.levels
      }
    }
  }

  return(highlight.col)
}

# check that a character string is a valid color
valid_colors_ <- function(clr) {
  tryCatch(is.matrix(grDevices::col2rgb(clr)), error = function(clr) FALSE)
}

# vectorized version of valid_colors_
valid_colors <- function(clr) {
  vapply(clr, valid_colors_, logical(1))
}

# create spaced points of length(pos)
sequence_along_chr_scaled <- function(pos) {
  pos <- pos - min(pos)
  if (max(pos) != 0) {
    return(pos / max(pos))
  } else {
    return(pos)
  }
}

# create an equally spaced points of length(pos)
sequence_along_chr_unscaled <- function(pos) {
  if (length(pos) == 1) {
    return(1/2)
  } else if (length(pos) > 1) {
    return(seq(from = 0, to = 1, length.out = length(pos)))
  } else {
    stop("Invalid pos")
  }
}

# concatenate elements across the list
concat_list <- function(dflist, concat_char = "/") {

  if (!all(unlist(lapply(dflist, is.vector)) | unlist(lapply(dflist, is.factor)))) {
    stop("All elements in the list should be a vector.")
  }

  check_lengths <- lapply(dflist, length)
  if (length(unique(unlist(check_lengths))) != 1) {
    stop("Length of all list elements should be equal.")
  }

  if (!is.character(concat_char)) stop("concat_char should be of character type.")

  if (length(concat_char) > 1) {
    warning("concat_char should be a character vector of length 1. Using first element.")
    concat_char <- concat_char[1]
  } else if (length(concat_char) < 1) {
    warning("concat_char should be a character of length 1. Using \"/\" by default.")
  }

  if (nchar(concat_char) < 1) {
    warning("concat_char should be a character of length 1. Using \"/\" by default.")
  }

  dflist <- lapply(dflist, function(x) {
    x <- as.character(x)
    x[is.na(x)] <- ""
    return(x)
  })
  dflist <- unname(dflist)
  dflist$sep <- concat_char
  dflist <- do.call(paste, dflist)
  dflist <- gsub(paste0("(", concat_char, ")", "+$"), "", dflist)
  dflist <- gsub(paste0("^", "(", concat_char, ")", "+"), "", dflist)
  return(dflist)
}

# concatenate columns of data.frame and produce a character vector
concat_df_cols <- function(df, concat_char = "/") {
  if (!is.data.frame(df)) stop("df should be a data.frame.")
  if (nrow(df) == 0) {
    return("")
  }
  if (length(concat_char) > 1) {
    warning("concat_char should be a character vector of length 1. Using first element.")
    concat_char <- concat_char[1]
  } else if (length(concat_char) < 1) {
    warning("concat_char should be a character of length 1. Using \"/\" by default.")
  }
  if (nchar(concat_char) < 1) {
    warning("concat_char should be a character of length 1. Using \"/\" by default.")
  }

  return(concat_list(as.list(df), concat_char))

}

# check that gds node exists
gds_node_exists <- function(gds, nodes) {
  all(nodes %in% gdsfmt::ls.gdsn(gds, recursive = TRUE))
}

# sample from a vector iff the number to sample from is below the length of x
sample_vec <- function(x, n) {
  if (length(x) == 1) {
    return(x)
  } else if (length(x) <= n) {
    return(x)
  } else {
    return(sample(x, size = n, replace = FALSE))
  }
}
leejs-abv/ggmanh documentation built on March 25, 2024, 11:17 a.m.