R/compute.R

Defines functions getPlottingData getLayout getGraph getPairwiseAlnScores getSequences initiateProject

Documented in getGraph getLayout getPairwiseAlnScores getPlottingData getSequences initiateProject

#' initiateProject
#'
#' @param dob 
#' @param given 
#' @param folder 
#' @param top 
#' @param step 
#' @param width 
#' @param genome_seq 
#' @param cores 
#'
#' @return project
#'
#' @export

initiateProject <- function(dob, given, folder = 'data', top = 100, step = 10, width = 100, genome_seq = NULL, cores = 5) {
    dob <- as.Date(dob, "%d/%m/%Y")
    y <- as.numeric(substr(dob, 1, 4))
    d <- substr(dob, 6, 10)
    path <- glue::glue("{folder}/{y}-{d}_{given}")
    if (!dir.exists('data')) { dir.create('data') }
    if (!dir.exists(path)) { dir.create(path) }
    l <- list(
        "folder" = folder,
        "project_path" = path, 
        "yob" = y, 
        "dob" = dob, 
        "given" = given, 
        "top" = top, 
        "step" = step, 
        "width" = width, 
        "genome_seq" = genome_seq, 
        "cores" = cores 
    )
    msg_success("Project initiated")
    msg_note(glue::glue("Folder: {folder}"))
    msg_note(glue::glue("D.O.B.: {dob}"))
    msg_note(glue::glue("Given name: {given}"))
    return(l)
}

#' getSequences
#'
#' @param project 
#' @param force 
#'
#' @return project
#'
#' @export

getSequences <- function(project, force = FALSE) {
    yob <- project[["yob"]]
    dob <- project[["dob"]]
    given <- project[["given"]]
    folder <- project[["folder"]]
    top <- project[["top"]]
    step <- project[["step"]]
    width <- project[["width"]]
    genome_seq <- project[["genome_seq"]]
    project_path <- project[["project_path"]]
    seqs_path <- glue::glue("{project_path}/seqs.rds")
    #
    if (!file.exists(seqs_path) | force) {
        if (is.null(genome_seq)) {
           msg_note(glue::glue("Importing genome sequence..."))
            # ------- Import genome sequence
            ah <- AnnotationHub::AnnotationHub()
            seq <- AnnotationHub::query(ah, 'Homo_sapiens.GRCh38.dna.primary_assembly.2bit')
            seq <- ah[['AH49723']]
            seq <- rtracklayer::import(seq)
            seq <- seq[GenomicRanges::width(seq) > 50000000]
        }
        else {
            seq = genome_seq
        }
        # ------- Define sequences specific to yob-dob
        msg_note(glue::glue("Extracting sequences corresponding to D.O.B: {dob}..."))
        set.seed(digest::digest2int(glue::glue("{yob}{dob}")))
        chr <- sample(names(seq), 1)
        loc <- sample(1:{width(seq[chr]) - 2000000}, 1)
        gr <- GRanges(seqnames = chr, ranges = IRanges(loc))
        gr <- resize(gr, fix = 'start', width = floor(365.25*top))
        gr <- slidingWindows(gr, step = step, width = width)
        gr <- gr[[1]]
        seqs <- seq[gr]
        msg_note(glue::glue("Saving sequences in {seqs_path}..."))
        saveRDS(seqs, seqs_path)
        msg_success(glue::glue("Sequences imported!"))
    } 
    else {
        msg_success(glue::glue("Sequences found in {seqs_path}"))
    }
    project[["seqs_path"]] <- seqs_path
    return(project)
}

#' getPairwiseAlnScores
#'
#' @param project 
#' @param force 
#'
#' @return project
#'
#' @export

getPairwiseAlnScores <- function(project, force = FALSE) {
    yob <- project[["yob"]]
    dob <- project[["dob"]]
    given <- project[["given"]]
    folder <- project[["folder"]]
    top <- project[["top"]]
    cores <- project[["cores"]]
    project_path <- project[["project_path"]]
    mat_path <- glue::glue("{project_path}/mat.rds")
    #
    if (!file.exists(mat_path) | force) {
        seqs <- readRDS(project[["seqs_path"]])
        # ------- Get pairwise aln scores
        substitution_matrix <- Biostrings::nucleotideSubstitutionMatrix(
            match = 1, mismatch = 0, baseOnly = FALSE, 
            type = "DNA"
        )[c('A', 'T', 'C', 'G', 'N'), c('A', 'T', 'C', 'G', 'N')]
        msg_note(glue::glue("Computing alignment matrix..."))
        mat <- parallel::mclapply(mc.cores = cores, 1:floor(length(seqs)/top*top), function(K) {
            aln <- Biostrings::pairwiseAlignment(
                seqs[1:floor(length(seqs)/top*top)], 
                seqs[K], substitutionMatrix = substitution_matrix
            )
            Biostrings::score(aln)
        })
        mat <- do.call(rbind, mat)
        rownames(mat) <- paste0('idx_', 1:nrow(mat))
        colnames(mat) <- paste0('idx_', 1:nrow(mat))
        mat[lower.tri(mat)] <- NA
        msg_note(glue::glue("Saving alignment matrix in {mat_path}..."))
        saveRDS(mat, mat_path)
        msg_success(glue::glue("Alignment done!"))
    }
    else {
        msg_success(glue::glue("Alignment matrix found in {mat_path}"))
    }
    project[["mat_path"]] <- mat_path
    return(project)
}

#' getGraph
#'
#' @param project 
#' @param force 
#'
#' @return project
#'
#' @export

getGraph <- function(project, force = FALSE) {
    yob <- project[["yob"]]
    dob <- project[["dob"]]
    given <- project[["given"]]
    folder <- project[["folder"]]
    top <- project[["top"]]
    step <- project[["step"]]
    project_path <- project[["project_path"]]
    graph_path <- glue::glue("{project_path}/graph.rds")
    #
    if (!file.exists(graph_path) | force) {
        mat <- readRDS(project[["mat_path"]])
        # ------- Get nodes / edges from mat
        `%>%` <- tidyr::`%>%`
        msg_note(glue::glue("Computing nodes..."))
        nodes <- data.frame(
            idx = paste0('idx_', 1:nrow(mat)),
            numidx = 1:nrow(mat), 
            year = round(seq(0, floor(nrow(mat)/365.25*step), length.out = nrow(mat)))
        )
        msg_note(glue::glue("Computing edges..."))
        edges <- mat %>% 
            as.data.frame() %>%
            tibble::rownames_to_column('from') %>% 
            tidyr::gather('to', 'weight', -from) %>% 
            dplyr::filter(!is.na(weight)) %>% 
            dplyr::filter(from != to) %>% 
            dplyr::filter(weight > 30)
        # ------- Turn it into a graph
        msg_note(glue::glue("Computing graph..."))
        set.seed(digest::digest2int(glue::glue("{yob}{dob}")))
        graph <- tidygraph::tbl_graph(
            nodes = nodes, 
            edges = edges,
            directed = FALSE
        )
        msg_note(glue::glue("Saving graph data in {graph_path}..."))
        saveRDS(graph, graph_path)
        msg_success(glue::glue("Graph data computed!"))
    }
    else {
        msg_success(glue::glue("Graph data found in {graph_path}"))
    }
    project[["graph_path"]] <- graph_path
    return(project)
}

#' getLayout
#'
#' @param project 
#' @param force 
#'
#' @return project
#'
#' @export

getLayout <- function(project, force = FALSE) {
    yob <- project[["yob"]]
    dob <- project[["dob"]]
    given <- project[["given"]]
    folder <- project[["folder"]]
    top <- project[["top"]]
    project_path <- project[["project_path"]]
    layout_path <- glue::glue("{project_path}/layout.rds")
    #
    if (!file.exists(layout_path) | force) {
        msg_note(glue::glue("Computing layout data..."))
        graph <- readRDS(project[["graph_path"]])
        lay <- ggraph::create_layout(graph, 'auto', weights = NA)
        msg_note(glue::glue("Saving layout data in {layout_path}..."))
        saveRDS(lay, layout_path)
        msg_success(glue::glue("Layout data computed!"))
    }
    else {
        msg_success(glue::glue("Layout data found in {layout_path}"))
    }
    project[["layout_path"]] <- layout_path
    return(project)
}

#' getPlottingData
#'
#' @param project 
#' @param force 
#'
#' @return project
#'
#' @export

getPlottingData <- function(project, force = FALSE) {
    `%>%` <- tidyr::`%>%`
    yob <- project[["yob"]]
    dob <- project[["dob"]]
    given <- project[["given"]]
    folder <- project[["folder"]]
    top <- project[["top"]]
    project_path <- project[["project_path"]]
    plotdf_path <- glue::glue("{project_path}/plotdf.rds")
    #
    if (!file.exists(plotdf_path) | force) {
        lay <- readRDS(project[["layout_path"]])
        # ------- Add noise and coloring 
        msg_note(glue::glue("Computing plotting data..."))
        set.seed(digest::digest2int(given))
        plotdf <- lay %>% 
            dplyr::select(idx, x, y, year) %>%
            dplyr::mutate(
                x_ = (x - min(x))/(max(x) - min(x)), 
                y_ = (y - min(y))/(max(y) - min(y)), 
                pertube = ambient::gen_simplex(x, y, frequency = 5) / 25,
                noise = ambient::gen_worley(x, y, value = 'distance', frequency = 5) / 25, 
                x = x_ - noise, 
                y = y_ - noise
            )
        msg_note(glue::glue("Saving plotting data in {plotdf_path}..."))
        saveRDS(plotdf, plotdf_path)
        msg_success(glue::glue("Plotting data computed!"))
    }
    else {
        msg_success(glue::glue("Plotting data found in {plotdf_path}"))
    }
    project[["plotdf_path"]] <- plotdf_path
    plotdf <- readRDS(plotdf_path)
    project[["data"]] <- plotdf
    return(project)
}
js2264/dnaRt documentation built on March 18, 2021, 2:38 p.m.