#' Checks genotype scores for heterozygosity
#'
#' Takes a vector of genotype scores (each made up of a two character string)
#' and a character class to match against, and returns a logical value depending
#' on whether the two characters match (\code{FALSE}) or not (\code{TRUE}).
#'
#' @param score A vector of genotype scores, each of which needs to be a two
#' character string. Numeric values are converted to strings
#' @param characterClass A regex character class to match against. If characters
#' in \code{score} do not exist in the character class, an error will be
#' returned
#' @param nonMatch c("false", "error") a string defining whether to return false
#' or throw an error if scores do not match characterClass.
#' @export
#' @examples
#' isHet("AA")
#' isHet(c("AA", "AB", "BB"))
#' isHet("GT", "[AB]")
isHet <- function(score, characterClass = "[[:alnum:]]", nonMatch = "false") {
if(class(score) == "numeric") warning("Numeric input will be coerced to character")
if(any(nchar(score) != 2)) stop("Expecting a string of length 2")
if(!nonMatch %in% c("false", "error")) stop("nonMatch should be either 'false' or 'error'")
if(nonMatch == "error" & any(grepl(sub("^(\\[)", "\\1^", characterClass), score))) {
stop("There are genotype scores that do not match the character class")
}
grepl(paste0("^(", characterClass, ")(?!\\1)"), score, perl = T)
}
#' Checks genotype scores for homozygosity
#'
#' @param score A vector of genotype scores, each of which needs to be a two
#' character string. Numeric values are converted to strings
#' @param characterClass A regex character class to match against. If characters
#' in \code{score} do not exist in the character class, an error will be
#' returned
#' @param nonMatch c("false", "error") a string defining whether to return false
#' or throw an error if scores do not match characterClass.
#' @export
isHomo <- function(score, characterClass = "[[:alnum:]]", nonMatch = "false") {
if(class(score) == "numeric") warning("Numeric input will be coerced to character")
if(any(nchar(score) != 2)) stop("Expecting a string of length 2")
if(!nonMatch %in% c("false", "error")) stop("nonMatch should be either 'false' or 'error'")
if(nonMatch == "error" & any(grepl(sub("^(\\[)", "\\1^", characterClass), score))) {
stop("There are genotype scores that do not match the character class")
}
grepl(paste0("(", characterClass, ")(?=\\1)"), score, perl = T)
}
#' Convert absolute scores to relative
#'
#' Works one marker at a time. Takes absolute genotype scores and converts to relative scores (AA/AB/BB to
#' match maternal/het/paternal).
#'
#' @param data A data frame
#' @param markerName Specification of the column containing the marker name
#' @param maternal Specification of the column containing the maternal score
#' @param paternal Specification of the column containing the paternal score
#' @param ... Specification of the columns containing progeny scores
#' @param missingString A string representing a missing score
#' @export
convert_rel <- function(data, markerName, maternal, paternal, ..., missingString = "--") {
markerName_ <- col_name(substitute(markerName))
maternal_ <- col_name(substitute(maternal))
paternal_ <- col_name(substitute(paternal))
if (n_dots(...) == 0) {
progeny_cols_ <- setdiff(colnames(data), c(markerName_, maternal_, paternal_))
} else {
progeny_cols_ <- unname(dplyr::select_vars(colnames(data), ...))
}
convert_rel_(data, markerName_, maternal_, paternal_, progeny_cols_, missingString)
}
#' Convert absolute scores to relative (standard-evaluation)
#'
#' This is a S3 generic.
#'
#' @param data A data frame
#' @param markerName_,maternal_,paternal_ Strings giving names of marker
#' maternal and paternal columns
#' @param progeny_cols_ Character vector giving column names of progeny scores
#' @param missingString A string representing a missing score
#' @keywords internal
#' @export
convert_rel_ <- function(data, markerName_, maternal_, paternal_, progeny_cols_, missingString = "--") {
UseMethod("convert_rel_")
}
#' @export
convert_rel_.data.frame <- function(data, markerName_, maternal_, paternal_, progeny_cols_,
missingString = "--") {
if (length(progeny_cols_) == 0) {
warning("No progeny columns were defined, return original data frame")
return(data)
}
progeny_idx <- match(progeny_cols_, names(data))
if (anyNA(progeny_idx)) {
missing_cols <- paste(progeny_cols_[is.na(progeny_idx)], collapse = ",")
stop("Unknown column names: ", missing_cols, call. = FALSE)
}
if (any(unlist(lapply(data[progeny_idx], class)) != "character")) {
stop("Progeny columns are not all character class")
}
maternal_idx <- match(maternal_, names(data))
if (!inherits(data[[maternal_]], "character")) {
stop("The maternal column is not of class character")
}
if (any(nchar(unlist(data[data[[maternal_]] != missingString, maternal_idx])) != 2)) {
stop("There are non-missing maternal scores that have more or less than two characters")
}
paternal_idx <- match(paternal_, names(data))
if (!inherits(data[[paternal_]], "character")) {
stop("The paternal column is not of class character")
}
if (any(nchar(unlist(data[data[[paternal_]] != missingString, paternal_idx])) != 2)) {
stop("There are non-missing paternal scores that have more or less than two characters")
}
data$converted <- apply(data, 1, function(x) {
converted <- convertScore(x[maternal_idx], x[paternal_idx], x[progeny_idx], missingString = missingString)
return(data.frame(t(converted)))
})
# data_out <- data %>%
# dplyr::select_(markerName_, "converted") %>%
# tidyr::unnest()
return(data)
}
#' Gets allele stats
#'
#' @param data A data.frame.
#' @param converted Specify the list column containing the converted allele calls (output from convert_rel)
#' @export
allele_stats <- function(data, converted) {
if(!inherits(data, "data.frame")) stop("data needs to be a data.frame")
converted_ <- col_name(substitute(converted))
if(!inherits(data[[converted_]], "list")) stop("The converted column should be a list")
allele_stats_(data, converted_)
}
#' Gets allele stats
#'
#' @param converted_ String defining the list column containing the converted allele calls (output from convert_rel)
#' @inheritParams allele_stats
#' @importFrom magrittr %>%
#' @export
allele_stats_ <- function(data, converted_) {
if(!inherits(data, "data.frame")) stop("data needs to be a data.frame")
if(!inherits(data[[converted_]], "list")) stop("The converted column should be a list")
converted_df <- data %>%
dplyr::select_(converted_) %>%
tidyr::unnest()
data$allele_stats <- apply(converted_df, 1, function(x) {
scores <- sum(!is.na(x))
missing <- (length(x) - scores)/length(x)
if(scores == 0) {
AA <- AB <- BB <- minor_allele <- minor_allele_freq <- NA
} else {
AA <- sum(x == "AA")/scores
AB <- sum(x == "AB")/scores
BB <- sum(x == "BB")/scores
if(AA == BB) {
minor_allele <- "equal"
minor_allele_freq <- AA
} else {
minor_allele <- c("AA", "BB")[which.min(c(AA, BB))]
minor_allele_freq <- c(AA, BB)[which.min(c(AA, BB))]
}
}
data_out <- data.frame(AA = AA, AB = AB, BB = BB,
minor_allele = minor_allele,
minor_allele_freq = minor_allele_freq,
missing = missing)
}) %>%
suppressWarnings()
return(data)
}
#' Checks informativeness of parents
#'
#' @param data A data.frame where each row is a marker score
#' @param maternal,paternal Strings giving names of maternal, paternal columns
#' @param missingString A string defining missing scores
#' @importFrom magrittr %>%
check_parents_ <- function(data, maternal, paternal, missingString = "--") {
if(length(dplyr::setdiff(c(maternal, paternal), colnames(data))) != 0) stop("check names for parents in check_parents_")
other_cols <- dplyr::setdiff(colnames(data), c(maternal, paternal))
mutate_call <- lazyeval::interp(~ ifelse(any(missingString %in% c(maternal, paternal)),
"missing",
ifelse(any(isHet(c(maternal, paternal))),
"heterozygous",
ifelse(maternal == paternal,
"nonseg",
ifelse(all(isHomo(c(maternal, paternal))),
"informative", "unknown")))),
# missingString = as.name(missingString),
maternal = as.name(maternal),
paternal = as.name(paternal))
data_out <- data %>%
# dplyr::select_(maternal, paternal) %>%
dplyr::rowwise() %>%
dplyr::mutate_(.dots = stats::setNames(list(mutate_call), "parent_status")) %>%
dplyr::select_(maternal, paternal, "parent_status", .dots = other_cols)
return(data_out)
}
#' Converts a single marker, typically a row in a data.frame.
#'
#' @param maternal,paternal A two character string, or missingString value.
#' @param progeny A (named) character vector of progeny scores.
#' @param missingString A string defining missing scores.
#'
convertScore <- function(maternal, paternal, progeny, missingString = "--") {
if(is.null(names(progeny))) names(progeny) <- paste0("progeny", 1:length(progeny))
progeny.out <- vector(mode = "character", length = length(progeny)) %>%
stats::setNames(names(progeny))
if(any(isHet(c(maternal, paternal))) |
any(grepl(missingString, c(maternal, paternal))) |
maternal == paternal) {
progeny.out[1:length(progeny)] <- NA
} else {
#Create possible het codes from paternal
#This is to avoid a progeny call that doesn't match parentals being called AB
hetmp <- paste0(sub("^([[:alnum:]]).", "\\1", maternal), sub("^([[:alnum:]]).", "\\1", paternal))
hetpm <- paste0(sub("^([[:alnum:]]).", "\\1", paternal), sub("^([[:alnum:]]).", "\\1", maternal))
progeny.out[progeny == maternal] <- "AA"
progeny.out[progeny == paternal] <- "BB"
progeny.out[progeny == missingString] <- missingString
progeny.out[progeny %in% c(hetmp, hetpm)] <- "AB"
progeny.out[progeny.out == ""] <- NA
}
return(progeny.out)
}
#' Convert R/qtl map into a tidy data.frame
#'
#' Similar to pull.map, but returns a tidy data.frame, tibble, tidy_gen_map.
#'
#' @param obj An object of either class cross, map or mpcross
#'
#' @export
#' @importFrom magrittr %>%
map2df <- function(obj) {
UseMethod("map2df")
}
#' @export
map2df.cross <- function(obj) {
mapdf_list <- lapply(obj$geno, function(x) {
out <- as.data.frame(x$map)
out$markerName <- rownames(out)
return(out)
})
bind_map_list(mapdf_list)
}
#' @export
map2df.map <- function(obj) {
mapdf_list <- lapply(obj, function(x) {
class(x) <- "numeric"
out <- as.data.frame(x)
out$markerName <- rownames(out)
return(out)
})
bind_map_list(mapdf_list)
}
#' @export
map2df.mpcross <- function(obj) {
map2df(obj$map)
}
#' Bind a list of mapdfs together
#'
#' @param mapdf_list A list of map dfs
#'
#' @return A tidy_gen_map
bind_map_list <- function(mapdf_list) {
mapdf <- dplyr::bind_rows(mapdf_list, .id = "lg")
names(mapdf)[2] <- "mapdist"
mapdf <- tibble::as_tibble(mapdf)
class(mapdf) <- c(class(mapdf), "tidy_gen_map")
return(mapdf)
}
#' Convert R/qtl genotyping data into data.frame
#'
#' Similar to map2df, but for the genotying values.
#'
#' @param map An R/qtl cross object
#' @export
geno2df <- function(map) {
genodf <- lapply(map$geno, function(x) {
out <- as.data.frame(x$data)
markers <- colnames(out)
out$id <- rownames(out)
out.g <- tidyr::gather_(out, "markerName", "score", markers)
return(out.g)
})
genodf <- dplyr::bind_rows(genodf, .id = "lg")
return(genodf)
}
#' Take output from /code{join2maps()} and a map object to be reversed, and
#' returns the map object with appropriate groups reversed.
#'
#' @param maps.comp_df Output from /code{join2maps()}.
#' @param refmapid The reference map name to identify columns in maps.comp_df.
#' @param revmapid The name of the map to be reversed, used to identify columns
#' in maps.comp_df.
#' @param revmap a cross object which is to have linkage groups reversed.
#' @export
#' @importFrom magrittr %>%
revmaplgs <- function(maps.comp_df, refmapid, revmapid, revmap) {
refmapdist_ <- paste0(deparse(substitute(refmapid)), "_mapdist")
map2dist_ <- paste0(deparse(substitute(revmapid)), "_mapdist")
refmaplg_ <- paste0(deparse(substitute(refmapid)), "_lg")
map2lg_ <- paste0(deparse(substitute(revmapid)), "_lg")
dist2_var <- "dist2"
dots_idx <- list(lazyeval::interp(~order(var), var = as.name(refmapdist_)))
dots_cor <- list(lazyeval::interp(~ cor(var1, var2),
var1 = as.name("index"),
var2 = as.name(map2dist_)))
dots_rev <- list(lazyeval::interp(~var < 0, var = as.name("rev")))
revs <- maps.comp_df %>%
dplyr::group_by_(map2lg_) %>%
dplyr::arrange_(refmapdist_) %>%
dplyr::mutate_(.dots = stats::setNames(dots_idx, "index")) %>%
dplyr::summarise_(.dots = stats::setNames(dots_cor, "rev")) %>%
dplyr::filter_(.dots = dots_rev)
outmap <- revmap(revmap, revs[[1]])
return(outmap)
}
#' Provide the 'mirror image' of a vector, especially of positions.
#'
#' @param vec A (possibly named) numeric vector.
#' @export
#' @examples
#' distvec <- c(A = 0, B = 3, C = 10)
#' revvec(distvec)
revvec <- function(vec) {
if(!is.numeric(vec)) stop("The vector needs to be numeric")
mid <- (min(vec, na.rm = T) + max(vec, na.rm = T))/2
newvec <- vec + 2 * (mid - vec)
newvec <- rev(newvec)
return(newvec)
}
#' Reverse particular linkage groups in a map from a cross object
#'
#' @param map A cross objects
#' @param revgroups The linkage groups to be reversed
revmap <- function(map, revgroups) {
if(any(!revgroups %in% names(map$geno))) stop("The supplied linkage groups do not all exist")
for(i in revgroups) {
map$geno[[i]]$data <- map$geno[[i]]$data[,ncol(map$geno[[i]]$data):1]
map$geno[[i]]$map <- revvec(map$geno[[i]]$map)
}
return(map)
}
#' Join two map dfs
#'
#' Take two maps that have been converted to df (possibly through
#' /code{map2df()}) and join them.
#' @param map1 The data.frame for the first map.
#' @param map2 The data.frame for the second map.
#' @param map1markerName An unquoted string defining the name for the marker
#' column in map1.
#' @param map2markerName An unquoted string defining the name for the marker
#' column in map2.
#' @param map1distName An unquoted string defining the name for the distance
#' column in map1.
#' @param map2distName An unquoted string defining the name for the distance
#' column in map2.
#' @param map1lgName An unquoted string defining the name for the linkage group
#' column in map1.
#' @param map2lgName An unquoted string defining the name for the linkage group
#' column in map2.
#' @export
#' @importFrom magrittr %>%
join2maps <- function(map1, map2, map1markerName, map2markerName, map1distName, map2distName, map1lgName, map2lgName) {
map1name <- deparse(substitute(map1))
map1markerName_ <- deparse(substitute(map1markerName))
map1distName_ <- deparse(substitute(map1distName))
map1lgName_ <- deparse(substitute(map1lgName))
map2name <- deparse(substitute(map2))
map2markerName_ <- deparse(substitute(map2markerName))
map2distName_ <- deparse(substitute(map2distName))
map2lgName_ <- deparse(substitute(map2lgName))
map1.r <- map1 %>%
dplyr::rename_(.dots = with(map1, stats::setNames(c(map1markerName_, map1distName_, map1lgName_),
c("markerName", paste(map1name, c("mapdist", "lg"), sep = "_")))))
map2.r <- map2 %>%
dplyr::rename_(.dots = with(map2, stats::setNames(c(map2markerName_, map2distName_, map2lgName_),
c("markerName", paste(map2name, c("mapdist", "lg"), sep = "_")))))
out <- map1.r %>%
dplyr::left_join(map2.r)
return(out)
}
#' Take output from /code{join2maps()} and put into long form for plotting
#'
#' Refers to a reference linkage group for facetting, and one for joining,
#' this avoids errors when a marker exists twice in one of the maps.
#' @param df A data.frame (output from /code{join2maps()}) of joined map data.
#' @param reflg_facet An unquoted string. The name for the reference linkage group identifier for
#' faceting.
#' @param reflg_join An unquoted string. The name for the reference linkage group identifier for
#' joining.
#' @param markerName An unquoted string. The name of the variable giving marker names.
#' @export
#' @importFrom magrittr %>%
longmaps <- function(df, reflg_facet, reflg_join, markerName = markerName) {
reflg_facet_ <- deparse(substitute(reflg_facet))
reflg_join_ <- deparse(substitute(reflg_join))
markerName_ <- deparse(substitute(markerName))
df$markerName.lg <- paste0(df[[markerName_]], "_", df[[reflg_join_]])
lgs.g <- df %>%
dplyr::select_(reflg_facet_, reflg_join_, "markerName.lg")
names(lgs.g) <- gsub("_.*", "", names(lgs.g))
lgs.g <- lgs.g %>%
tidyr::gather_("source", "lg", names(lgs.g)[!grepl("markerName.lg", names(lgs.g))])
mapdist_id <- names(df)[grepl("_mapdist", names(df))]
dist.g <- df %>%
dplyr::select_(.dots = c(mapdist_id, "markerName.lg"))
names(dist.g) <- gsub("_.*", "", names(dist.g))
dist.g <- dist.g %>%
tidyr::gather_("source", "mapdist",
names(dist.g)[!grepl("markerName.lg", names(dist.g))])
lgs.c <- lgs.g %>%
dplyr::inner_join(dist.g)
reflg_facet_ <- deparse(substitute(reflg_facet))
out.df <- df %>%
dplyr::select_(reflg_facet_, "markerName.lg") %>%
dplyr::left_join(lgs.c)
out.df <- out.df[!is.na(out.df$lg),]
}
#' Take a linkage group from a cross object, convert into long form and sort
#' according to genotype scores for a marker of interest.
#'
#' @param lgObject A linkage group from a cross object. Usually in the form
#' cross$geno$name.
#' @param markerOfInterest A string identifying the marker by which the data
#' should be sorted.
#' @export
#' @importFrom magrittr %>%
genoComp <- function(lgObject, markerOfInterest) {
df <- as.data.frame(lgObject$data)
df$Genotype <- rownames(lgObject$data)
dfg <- df %>%
tidyr::gather_("markerName", "score",
dplyr::select_vars_(names(df),
names(df),
exclude = "Genotype"))
ord <- dfg %>%
dplyr::filter_("markerName" == markerOfInterest) %>%
dplyr::arrange_("score") %>%
dplyr::mutate_(ord = order(as.name("score"))) %>%
dplyr::arrange(ord)
dfg$Gen.sort <- factor(dfg$Genotype, levels = ord$Genotype, ordered = TRUE)
dfg$Mark.sort <- factor(dfg$markerName, levels = names(lgObject$map), ordered = TRUE)
return(dfg)
}
#' Converts a character vector of genotype scores into two classes - one of which represents one homozygous state, and the other represents the other homozygous state and all heterozygotes
#' @param vec A character vector of genotype scores, each of which is made of two characters. An na_string may also be present
#' @param level Either 1 or 2, indicating which homozygous state is to be compared to the other state/heterozygotes. Default is 1
#' @param na_string a string representing NA. Default is "u"
#' @export
make_nptest_classes <- function(vec, level = 1, na_string = "u") {
stopifnot(class(vec) == "character")
vec[vec == na_string] <- NA
vec_fac <- as.factor(vec)
if(any(genomap::isHet(vec)) & length(levels(vec_fac)) > 3) {
return(rep("too many", length(vec)))
#stop("Too many levels in vec")
}
if(!any(genomap::isHet(vec)) & length(levels(vec_fac)) > 2) {
return(rep("too many", length(vec)))
#stop("Too many levels in vec")
}
if(length(levels(vec_fac)) < 2) {
return(rep(NA_character_, length(vec)))
}
stopifnot(length(levels(vec_fac)) >= 2)
if(level == 1) {
main_level <- levels(vec_fac)[1]
gen_levels <- c(main_level, paste0("not_", main_level))
if(any(genomap::isHet(vec))) {
out_vec <- as.numeric(vec_fac)
out_vec[out_vec > 1] <- 2
}
if(!any(genomap::isHet(vec))) {
out_vec <- as.numeric(vec_fac)
}
}
if(level == 2) {
if(any(genomap::isHet(vec))) {
if(length(levels(vec_fac)) < 3) {
return(rep(NA_character_, length(vec)))
} else {
main_level <- levels(vec_fac)[3]
gen_levels <- c(main_level, paste0("not_", main_level))
out_vec <- as.numeric(vec_fac)
out_vec[out_vec == 1] <- 2
out_vec[out_vec == 3] <- 1
}
}
if(!any(genomap::isHet(vec))) {
main_level <- levels(vec_fac)[2]
gen_levels <- c(main_level, paste0("not_", main_level))
out_vec <- as.numeric(vec_fac)
out_vec[out_vec == 1] <- 3
out_vec[out_vec == 2] <- 1
out_vec[out_vec == 3] <- 2
}
}
out_vec <- factor(out_vec, labels = gen_levels)
return(as.character(out_vec))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.