From: https://github.com/tidyverse/tidyr/issues/613.
require(tidyverse) require(data.table) data.test <- matrix( data = sample( x = c(0L, 1L, 2L, NA_integer_),#the genotypes size = 2e+07, replace = TRUE, prob = c(0.8, 0.10, 0.05, 0.05) ), nrow = 20000,#number of SNPs/markers ncol = 1000,#number of samples dimnames = list(rownames = seq(1, 20000, 1), colnames = seq(1, 1000, 1)) ) %>% tibble::as_tibble(x = ., rownames = "MARKERS")
library("cdata") library("rqdatatable") packageVersion("data.table") packageVersion("tidyr") packageVersion("dplyr") packageVersion("cdata") packageVersion("rqdatatable")
data.test <- data.frame(data.test)
test1: data.table::melt.data.table
system.time( test1 <- data.table::as.data.table(data.test) %>% data.table::melt.data.table( data = ., id.vars = "MARKERS", variable.name = "INDIVIDUALS", value.name = "GENOTYPES", variable.factor = FALSE) ) # reported: #~0.41sec
test1 <- orderby(test1, qc(MARKERS, INDIVIDUALS, GENOTYPES))
test2: tidyr::gather
system.time( test2 <- tidyr::gather( data = data.test, key = "INDIVIDUALS", value = "GENOTYPES", -MARKERS) ) # reported: #~0.39sec
test2 <- orderby(test2, qc(MARKERS, INDIVIDUALS, GENOTYPES)) stopifnot(isTRUE(all.equal(test1, test2)))
test 2b: reverse
system.time({ test2b <- tidyr::spread(test2, key = INDIVIDUALS, value = GENOTYPES) }) test2b <- orderby(test2b, colnames(test2b)) data.test <- orderby(test2b, colnames(data.test)) stopifnot(isTRUE(all.equal(data.test, test2b)))
test3: latest tidyr::pivot_longer
run_pivot_longer <- exists('pivot_longer', where = 'package:tidyr', mode = 'function')
system.time( test3 <- tidyr::pivot_longer( df = data.test, cols = -MARKERS, names_to = "INDIVIDUALS", values_to = "GENOTYPES") ) # reported: #~90sec !!!
test3 <- orderby(test3, qc(MARKERS, INDIVIDUALS, GENOTYPES)) stopifnot(isTRUE(all.equal(test1, test3)))
test 4: cdata::unpivot_to_blocks() (with data.table)
system.time({ cT <- build_unpivot_control( nameForNewKeyColumn = "INDIVIDUALS", nameForNewValueColumn = "GENOTYPES", columnsToTakeFrom = setdiff(colnames(data.test), c("MARKERS", "INDIVIDUALS", "GENOTYPES"))) layout <- rowrecs_to_blocks_spec( cT, recordKeys = "MARKERS", allow_rqdatatable = TRUE) print(layout$allow_rqdatatable) test4 <- layout_by(layout, data.test) })
test4 <- orderby(test4, qc(MARKERS, INDIVIDUALS, GENOTYPES)) stopifnot(isTRUE(all.equal(test1, test4)))
Slow.
system.time({ inv_layout <- t(layout) print(inv_layout$allow_rqdatatable) back4 <- layout_by(inv_layout, test4) })
back4 <- orderby(back4, colnames(back4)) data.test <- orderby(back4, colnames(data.test)) stopifnot(isTRUE(all.equal(data.test, back4)))
test 5: cdata::unpivot_to_blocks() (without data.table)
Slow.
system.time({ cT <- build_unpivot_control( nameForNewKeyColumn = "INDIVIDUALS", nameForNewValueColumn = "GENOTYPES", columnsToTakeFrom = setdiff(colnames(data.test), c("MARKERS", "INDIVIDUALS", "GENOTYPES"))) layout <- rowrecs_to_blocks_spec( cT, recordKeys = "MARKERS", allow_rqdatatable = FALSE) print(layout$allow_rqdatatable) test5 <- layout_by(layout, data.test) })
test5 <- orderby(test5, qc(MARKERS, INDIVIDUALS, GENOTYPES)) stopifnot(isTRUE(all.equal(test1, test5)))
Slow.
system.time({ inv_layout <- t(layout) print(inv_layout$allow_rqdatatable) back5 <- layout_by(inv_layout, test5) })
back5 <- orderby(back5, colnames(back5)) data.test <- orderby(back5, colnames(data.test)) stopifnot(isTRUE(all.equal(data.test, back5)))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.