Nothing
library(nc)
library(testthat)
context("df")
source(system.file("test_engines.R", package="nc", mustWork=TRUE), local=TRUE)
if(requireNamespace("stringi"))test_engines("default/specified engine respected", {
subject.df <- data.frame(f="foo", b="bar")
match.dt <- capture_first_df(
subject.df,
f=list(icu="[\\p{Letter}]"),#only works with ICU
b=list(re2=".(?R)?", engine="PCRE"),#only works with PCRE
engine="ICU")
expect_identical(match.dt[["icu"]], "f")
expect_identical(match.dt[["re2"]], "bar")
})
subject.df <- data.frame(
JobID=c(
"13937810_25",
"13937810_25.batch",
"13937810_25.extern",
"14022192_[1-3]",
"14022204_[4]"),
subject=c(
ten="chr10:213,054,000-213,055,000",
chrNA="chrNA:111,000-222,000",
no.match="foo bar",
missing=NA,
two="chr1:110-111 chr2:220-222"),
stringsAsFactors=FALSE)
range.pattern <- list(
"\\[",
task1="[0-9]+", as.integer,
"(?:-",#begin optional end of range.
taskN="[0-9]+", as.integer,
")?", #end is optional.
"\\]")
test_engines("capture_first_df returns data.table", {
match.dt <- capture_first_df(
subject.df,
JobID=list(
job="[0-9]+", as.integer,
"_",
"(?:",#begin alternate
task="[0-9]+", as.integer,
"|",#either one task(above) or range(below)
range.pattern,
")",#end alternate
"(?:[.]",
type=".*",
")?"),
subject=list(
nomatch.error=FALSE,
chrom="chr.*?",
":",
chromStart=".*?",
"-",
chromEnd="[0-9,]*"))
expect_is(match.dt, "data.table")
expect_identical(names(match.dt), c(
"JobID", "subject",
"job", "task", "task1", "taskN", "type",
"chrom", "chromStart", "chromEnd"))
expect_identical(match.dt$job, as.integer(c(
13937810, 13937810, 13937810, 14022192, 14022204)))
expect_identical(match.dt$task, as.integer(c(
25, 25, 25, NA, NA)))
expect_identical(match.dt$task1, as.integer(c(
NA, NA, NA, 1, 4)))
expect_identical(match.dt$taskN, as.integer(c(
NA, NA, NA, 3, NA)))
expect_identical(match.dt$type, c(
"", "batch", "extern", "", ""))
expect_identical(match.dt$chrom, c(
"chr10", "chrNA", NA, NA, "chr1"))
expect_identical(match.dt$chromStart, c(
"213,054,000", "111,000", NA, NA, "110"))
expect_identical(match.dt$chromEnd, c(
"213,055,000", "222,000", NA, NA, "111"))
})
range.square <- list(
"[[]",
task1="[0-9]+", as.integer,
"(?:-",#begin optional end of range.
taskN="[0-9]+", as.integer,
")?", #end is optional.
"[]]")
test_engines("capture_first_df square brackets pattern", {
full.square <- list(
job="[0-9]+", as.integer,
"_",
"(?:",#begin alternate
task="[0-9]+", as.integer,
"|",#either one task(above) or range(below)
range.square,
")",#end alternate
"(?:[.]",
type=".*",
")?")
if(identical(getOption("nc.engine"), "ICU")){
expect_error({
capture_first_df(subject.df, JobID=full.square)
}, "when matching pattern above with ICU engine")
}else{
match.dt <- capture_first_df(subject.df, JobID=full.square)
expect_identical(names(match.dt), c(
"JobID", "subject",
"job", "task", "task1", "taskN", "type"))
expect_identical(match.dt$job, as.integer(c(
13937810, 13937810, 13937810, 14022192, 14022204)))
expect_identical(match.dt$task, as.integer(c(
25, 25, 25, NA, NA)))
expect_identical(match.dt$task1, as.integer(c(
NA, NA, NA, 1, 4)))
expect_identical(match.dt$taskN, as.integer(c(
NA, NA, NA, 3, NA)))
expect_identical(match.dt$type, c(
"", "batch", "extern", "", ""))
}
})
no.rownames <- data.frame(
JobID=c(
"13937810_25",
"13937810_25.batch",
"13937810_25.extern",
"14022192_[1-3]",
"14022204_[4]"),
position=c(
"chr10:213,054,000-213,055,000",
"chrNA:111,000-222,000",
"foo bar",
NA,
"chr1:110-111 chr2:220-222"),
stringsAsFactors=FALSE)
test_engines("capture_first_df returns data.frame with default rownames", {
match.df <- capture_first_df(
no.rownames,
JobID=list(
job="[0-9]+", as.integer,
"_",
"(?:",#begin alternate
task="[0-9]+", as.integer,
"|",#either one task(above) or range(below)
range.pattern,
")",#end alternate
"(?:[.]",
type=".*",
")?"),
position=list(
nomatch.error=FALSE,
chrom="chr.*?",
":",
chromStart=".*?",
"-",
chromEnd="[0-9,]*"))
expect_identical(names(match.df), c(
"JobID", "position",
"job", "task", "task1", "taskN", "type",
"chrom", "chromStart", "chromEnd"))
expect_identical(match.df$job, as.integer(c(
13937810, 13937810, 13937810, 14022192, 14022204)))
expect_identical(match.df$task, as.integer(c(
25, 25, 25, NA, NA)))
expect_identical(match.df$task1, as.integer(c(
NA, NA, NA, 1, 4)))
expect_identical(match.df$taskN, as.integer(c(
NA, NA, NA, 3, NA)))
expect_identical(match.df$type, c(
"", "batch", "extern", "", ""))
expect_identical(match.df$chrom, c(
"chr10", "chrNA", NA, NA, "chr1"))
expect_identical(match.df$chromStart, c(
"213,054,000", "111,000", NA, NA, "110"))
expect_identical(match.df$chromEnd, c(
"213,055,000", "222,000", NA, NA, "111"))
expect_identical(rownames(match.df), paste(1:5))
})
named.uniq.chr <- data.frame(
JobID=c(
foo="13937810_25",
bar="13937810_25.batch",
baz="13937810_25.extern",
sars="14022192_[1-3]",
last="14022204_[4]"),
position=c(
"chr10:213,054,000-213,055,000",
"chrNA:111,000-222,000",
"chr2:1-2",
"chr3:4-5",
"chr1:110-111 chr2:220-222"),
stringsAsFactors=FALSE)
keep.digits <- function(x)as.integer(gsub("[^0-9]", "", x))
test_engines("two name groups not OK with named subject", {
expect_error({
capture_first_df(
named.uniq.chr,
JobID=list(
name="[^.]+[.].|[0-9]+",
rest=".*"),
position=list(
name="chr.*?",
":",
chromStart=".*?", keep.digits,
"-",
chromEnd="[0-9,]*", keep.digits))
}, "must not conflict with existing column names")
})
test_engines("error for no pattern", {
expect_error({
capture_first_df(named.uniq.chr)
}, "no patterns specified in ...")
})
test_engines("error for un-named list", {
expect_error({
capture_first_df(named.uniq.chr, list())
}, "each pattern in ... must be named using a column name of subject")
})
test_engines("error for un-named list with name", {
expect_error({
capture_first_df(named.uniq.chr, list(foo="bar"))
}, "each pattern in ... must be named using a column name of subject")
})
test_engines("error for un-recognized name", {
expect_error({
capture_first_df(named.uniq.chr, foo="bar")
}, "each pattern in ... must be named using a column name of subject")
})
test_engines("error for non-df subject", {
expect_error({
capture_first_df(c("foo", "bar"), list(foo="bar"))
}, "subject must be a data.frame with character columns to match")
})
test_engines("error for non-df subject", {
expect_error({
capture_first_df(c("foo", "bar"), foo="bar")
}, "subject must be a data.frame with character columns to match")
})
test_engines("error for factor column", {
fac.df <- data.frame(foo="bar", stringsAsFactors=TRUE)
expect_error({
capture_first_df(fac.df, foo=list(baz="sars"))
}, "problem for subject column foo: Error in stop_for_subject(subject): subject has class=factor and length=1 but should be a character vector with length>0", fixed=TRUE)
})
test_engines("error for same column name twice", {
expect_error({
capture_first_df(
named.uniq.chr,
JobID=list(f="baz"),
JobID="foo")
}, "each argument / subject column name should be unique")
})
test_engines("error for named subject", {
expect_error({
capture_first_df(
JobID=named.uniq.chr,
JobID=list(f="[0-9]+"))
},
"first argument (subject data.frame) should not be named",
fixed=TRUE)
})
in.df <- data.frame(bar="foobar", stringsAsFactors=FALSE)
test_engines("df only one group = name", {
out.dt <- capture_first_df(
in.df,
bar=list(
name="foo"))
expect_identical(out.dt$name, "foo")
})
matching.subjects <- c(
"chr10:213,054,000-213,055,000",
"chrM:111,000",
"chr1:110-111 chr2:220-222") # two possible matches.
test_engines("df subject no error if nomatch.error=TRUE and all matches", {
subject.df <- data.frame(
subject.col=matching.subjects, stringsAsFactors=FALSE)
match.dt <- capture_first_df(
subject.df,
subject.col=list(
nomatch.error=TRUE,
chrom="chr.*?",
":",
chromStart="[0-9,]+", keep.digits,
list(
"-",
chromEnd="[0-9,]+", keep.digits
), "?"))
expect_identical(
match.dt$chromEnd,
as.integer(c(213055000, NA, 111)))
})
chr.pos.nomatch.vec <- c(
"chr10:213,054,000-213,055,000",
"chrM:111,000",
"this will not match",
NA, # neither will this.
"chr1:110-111 chr2:220-222") # two possible matches.
test_engines("df subject stop if nomatch.error=TRUE and no match", {
subject.df <- data.frame(chr.pos.nomatch.vec, stringsAsFactors=FALSE)
expect_error({
capture_first_df(
subject.df,
chr.pos.nomatch.vec=list(
nomatch.error=TRUE,
chrom="chr.*?",
":",
chromStart="[0-9,]+", keep.digits,
list(
"-",
chromEnd="[0-9,]+", keep.digits
), "?"))
}, "subject(s) 3,4 (2 total) did not match regex below", fixed=TRUE)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.