flagSpp <- function(Z, index){
kill.action <- function(Z, index, code){
if(code=="SKIP"){
# truly skip; leave all flags as-is
}
if(code=="no"){
# nada
Z[index, flag:="fine"]
}
if(code=="m3"){
Z[index & val.src=="m3",flag:="bad"]
Z[index & val.src!="m3" | is.na(val.src),flag:="ok"]
}
if(code!="no" & code!="m3" & code!="SKIP"){
num2kill <- which(seq_len(nrow(Z[index])) %in% as.integer(strsplit(code, split=" ")[[1]]))
Z[index & cumsum(index)%in%num2kill, flag:="bad"]
Z[index & !cumsum(index)%in%num2kill, flag:="ok"]
}
} # end kill.action
kill.badKey <- function(Z, index){
print(Z[index], nrow=Inf)
kill.code <- readline("There's a conflict in the data entry. Enter one of the following:\n [m3] flag all val.src==m3 as 'bad'\n [no] no problem, flag everything as 'fine'\n [0-9] Enter row #'s to flag as 'bad', with each # sep by a space\n [SKIP] Do nothing; leave flag as-is\n")
kill.action(Z, index, kill.code)
}
invisible(kill.badKey(Z, index))
}
# =========================================
# = Check for and Correct Inconsistencies =
# =========================================
checkConsistent <- function(Z, col2check=names(Z)[!names(Z)%in%c(by.col,not.consistent)], by.col="spp", not.consistent=c("ref","flag","val.src","tbl.row","mtch.src","tax.src","website","website2","tax.src2","conflict")){
replacementsMade <- 0
replacementsFailed <- 0
replacementUnneeded <- 0
taxProblemSolution <- data.table(spp=character(), prob.col=character(), solution=character())
tPS <- FALSE
for(i in unique(Z[,eval(s2c(by.col))][[1]])){
for(j in col2check){
t_ind <- Z[,eval(s2c(by.col))[[1]]==i & !is.na(eval(s2c(by.col))[[1]])]
t.out <- Z[t_ind,get(j)] # temporary value for a given `spp` and the j column
if(lu(t.out)>1){
if(lu(t.out[!is.na(t.out)])==1){ # if the 2+ values are just an NA and something else
# then just replace the NA with the something else
replacementsMade <- replacementsMade + 1L
Z[t_ind,c(j):=list(unique(t.out[!is.na(t.out)]))]
}else{ # otherwise, if there are more than 2 non-NA unique values
prob.spp <- i # prob. means "problem" / "problematic"
prob.col <- j
prob.opts <- una(t.out) #unique(t.out[!is.na(t.out)])
fix.taxProb <- function(){ # defining in function in loop for readability
po_opts <- paste0(1:length(prob.opts), ": ", prob.opts)
po_ind <- readline(paste(
"Pick your solution. SKIP to skip, otherwise pick the number you want:\n ",
paste0(po_opts, collapse="\n "), "\n"
))
prob.opts[as.integer(po_ind)]
}
if(tPS){ # FALSE; a prompt from when I had this in a script, was TRUE if reading in a file that had some answers
prob.fix.t <- taxProblemSolution[eval(s2c(by.col))[[1]]==i & prob.col==j,solution]
if(length(prob.fix.t)>0){
prob.fix <- prob.fix.t
}else{
print(j)
print(Z[eval(s2c(by.col))[[1]]==i])
prob.fix <- fix.taxProb()
taxProblemSolution <- rbind(taxProblemSolution, data.table(spp=i, prob.col=j, solution=prob.fix))
}
}else{
print(j)
print(Z[eval(s2c(by.col))[[1]]==i])
prob.fix <- fix.taxProb()
taxProblemSolution <- rbind(taxProblemSolution, data.table(spp=i, prob.col=j, solution=prob.fix))
}
if(prob.fix!="SKIP"){ # I've never tested the use of the SKIP response ...
Z[eval(s2c(by.col))[[1]]==i,c(j):=list(prob.fix)]
replacementsMade <- replacementsMade + 1L
}else{
replacementsFailed <- replacementsFailed + 1L
}
}
}else{
replacementUnneeded <- replacementUnneeded + 1L
}
}
}
}
# =================
# = check.and.set =
# =================
# Function to see if the corrected version of a bad spp name
# already exists in a data set; if it does,
# then the wrong version is overwritten with the content
check_and_set <- function(wrong, corrected, Z=spp.key){
check <- spp.key[spp!=wrong,corrected%in%spp]
if(check){
# if the corrected name already exists,
# make sure to have all the rows with the wrong name match up with the corrected rows,
# that way if we make any changes, both sets get updated
# For example, if a name XX is wrong, and Xx is the corrected name,
# say that we are going to set the trophic level of Xx to 42,
# but the current entry is 40. If we were to say 'change all rows
# with name XX to have a TL to 42, and also switch the bad XX name to the good Xx name',
# then we would have some rows with TL of 42 (the ones that originally had the bad name), and
# some rows with TL of 40 (the ones that originally had the corrected name).
# Thus, we have to get the names and other content to match before changing anything.
# Bottom line is that we need to be sure that all things are consistent, and that this requires
# more care when we are switching the 'spp' of an entry to a 'spp' that is already there.
noSet <- c("ref", "val.src", "tbl.row", "mtch.src", "website","website2","flag", "conflict", "tax.src2")
all.but.noSet <- names(Z)[!names(Z)%in%noSet]
stopifnot(
all(
sapply(
Z[spp==corrected,eval(s2c(all.but.noSet))],
function(x)length(unique(x[!is.na(x)]))<=1
)
)
)
# Replace NA's with non-NA's in the same column,
# where needed, and if possible
all.vals <- Z[spp==corrected & spp!=wrong,]
if(any(is.na(all.vals))){
set2nonNA(Z=Z, index=Z[,spp==corrected&spp!=wrong])
}
Z[spp==wrong, c(all.but.noSet):=unique(Z[spp==corrected,eval(s2c(all.but.noSet))])]
}else{
# if the corrected name doesn't already exist,
# then simply switch the wrong name to the corrected name,
Z[spp==wrong, spp:=corrected]
}
setkey(Z, ref, spp)
invisible(NULL)
}
# ===========
# = ref2spp =
# ===========
# Function that is useful when
# you see that you need to change the `spp` value for a certain `ref`,
# but the new `spp` value you are switching to is already in spp.key.
# In this case, you could enter everything to match what is already in spp.key,
# but this function just takes your row with ref==Ref,
# and sets its columns to the values already present in the spp==Spp rows.
# It also sets the spp column in the ref==Ref row to be Spp.
ref2spp <- function(Ref, Spp, Z=spp.key){
check <- Z[ref!=Ref,Spp%in%spp]
if(check){
noSet <- c("ref", "val.src", "tbl.row", "mtch.src", "website", "website2", "flag", "conflict", "tax.src2")
all.but.noSet <- names(Z)[!names(Z)%in%noSet]
stopifnot(
all(
sapply(
Z[spp==Spp & ref!=Ref,eval(s2c(all.but.noSet))],
function(x)length(unique(x[!is.na(x)]))<=1
)
)
)
# Replace NA's with non-NA's in the same column,
# where needed, and if possible
all.vals <- Z[spp==Spp & ref!=Ref,]
if(any(is.na(all.vals))){
set2nonNA(Z=Z, index=Z[,spp==Spp&ref!=Ref])
}
# prepare to insert the Spp values in the Ref rows
new.vals <- Z[spp==Spp & ref!=Ref,eval(s2c(all.but.noSet))]
setkey(new.vals, spp)
new.vals <- unique(new.vals, by=key(new.vals))
# Insert
Z[ref==Ref, c(all.but.noSet):=new.vals]
}else{
# if the corrected name doesn't already exist,
# then simply switch the wrong name to the corrected name,
Z[ref==Ref, spp:=Spp]
}
# Return
# setkey(Z, ref, spp)
invisible(NULL)
}
# =============
# = set2nonNA =
# =============
# When a column of a data.frame has 2 unique values,
# an NA, and a non-NA, this function will set all NA values to the non-NA value.
# Z is the name of a full data.table, and index is a vector of TRUE/FALSE
# indicating which rows of Z are supposed to be examined
# The use-case in mind is where Z is a full data.table,
# and index indicates a portion of the data.table that is supposed to be compared.
# It's done this way to avoid having to subset the data.table in advance,
# that way the replacements can easily be made "in place", thus not soaking up extra memory.
# Similarly, note that thus function changes the data.table passed to the Z argument ...
# there is no output, but the original data.table will be affected.
set2nonNA <- function(Z, index){
tn <- names(Z)
to.set <- sapply(Z[index], function(x)any(is.na(x)) & lu(x[!is.na(x)])==1)
c.to.set <- tn[to.set]
if(length(c.to.set)>=1){
for(i in 1:length(c.to.set)){
set.vals <- Z[index, unique(eval(s2c(c.to.set[i]))[[1]])]
set.val <- set.vals[!is.na(set.vals)]
stopifnot(class(set.val)==Z[index,class(eval(s2c(c.to.set[i]))[[1]])])
Z[index, c(c.to.set[i]):=list(set.val)]
}
}
}
# =========
# = check =
# =========
check <- function(X, check_ind, random=FALSE){
pr <- function(X, index, check_num=NULL, check_tot=NULL){
help_msg <- "
others --col1-col2: type others followed by column names to match to spp.key
change --col1-col2 val: type change followed by column names to change to val
clearFlag: changes most columns to NA, sets flag to bad
genusCheck: genus is correct, but taxLvl, species, spp and trophic information wrong; flag as check
splitSpp: copy spp into species, first word of spp into genus, set taxLvl to spp, flag as check
comNA: set common to NA, flag as check
r2s --spp: apply ref2spp function; after supplying spp name, check for it in rest of data set, and import values from other instances of spp
undo --n: undoes changes to past n lines (note: not past n *changes*, but *lines*!)
z: undoes all changes made to current line (undo --1)
g: google the 'ref' in chrome
f: FIND 'ref' using gnr_resolve; matches might be outdated
s: SYNONYMS of 'spp'; find most up-to-date
c: continue to next line
"
stopifnot(sum(index)==1)
if(is.null(check_num) | is.null(check_tot)){
cat("\n>>> Current line to check:\n")
}else{
current_msg <- paste("\n>>> Current line to check (item ", check_num, " of ", check_tot, ", ", round(check_num/check_tot, 3)*100, "%):\n", sep="")
cat(current_msg)
}
cat(print(X[index]))
get_piece <- function(rl1){
gsub("(?=[ --]).*", "", rl1, perl=TRUE)
}
rl1 <- readline("Declare action (type 'h' for help): ")
piece <- get_piece(rl1)
while(!piece%in%c("others","change","clearFlag","genusCheck","splitSpp","comNA","r2s","z","undo","g","f","s","c")){
if(piece=="h"){
cat(help_msg)
}else{
cat(">>> invalid action selected, type 'h' for help")
}
rl1 <- readline("Declare action (type 'h' for help): ")
piece <- get_piece(rl1)
}
# while(rl1=="h"){
# cat(help_msg)
# rl1 <- readline("Declare action (type 'h' for help): ")
# }
piece <- get_piece(rl1)
stopifnot(piece%in%c("others","change","clearFlag","genusCheck","splitSpp","comNA","r2s","z","undo","g","f","s","c"))
return(list(rl1=rl1, piece=piece))
}
others <- function(X, index, rl1, retInd=FALSE){
cols <- c(simplify2array(strsplit(gsub("others --", "", rl1), "-")))
X_val <- X[index, eval(s2c(cols))]
dt_in <- function(d1, d2, cols){
m <- mapply("%in%", d1[,eval(s2c(cols))], d2[,eval(s2c(cols))])
rowSums(m)==ncol(m)
}
X_ind <- dt_in(X, X[index], cols=cols)
if(sum(X_ind)==0){
cat(">>> no matches found")
}else{
cat(print(X[X_ind]))
}
if(retInd){
return(X_ind)
}else{
invisible(NULL)
}
}
change <- function(X, index, rl1){
col_char <- gsub("(change --)|( [ a-zA-Z0-9]*$)", "", rl1)
val <- gsub("(change --[a-zA-Z0-9-]*[a-zA-Z] )", "", rl1)
cols <- c(simplify2array(strsplit(col_char, "-")))
if(val=="NA"){
val <- NA
}else{
val <- as(val, unique(sapply(X[,eval(s2c(cols))], class)))
}
X[index, c(cols):=val]
cat("\n>>> changes made\n")
flush.console()
invisible(NULL)
}
clearFlag <- function(X, index){
rl_flag <- "change --flag bad"
rl_clear <- "change --spp-common-taxLvl-species-genus-family-order-class-superclass-subphylum-phylum-kingdom-Picture-trophicOrig-trophicDiet NA"
rl_clear_tl <- "change --trophicLevel-trophicLevel.se NA"
rlf <- change(X, index, rl_flag)
rlc <- change(X, index, rl_clear)
rlc_tl <- change(X, index, rl_clear_tl)
invisible(NULL)
}
genusCheck <- function(X, index){
rl_flag <- "change --flag check"
rl_clear <- "change --spp-common-taxLvl-species-Picture-trophicOrig-trophicDiet NA"
rl_clear_tl <- "change --trophicLevel-trophicLevel.se NA"
rlf <- change(X, index, rl_flag)
rlc <- change(X, index, rl_clear)
rlc_tl <- change(X, index, rl_clear_tl)
X[index, spp:=genus]
X[index, taxLvl:="genus"]
invisible(NULL)
}
splitSpp <- function(X, index){
spp_name <- X[index, spp]
genus_name <- gsub("^([A-Z][[:alnum:]]+).*$", "\\1", spp_name)
X[index, c("taxLvl", "species", "genus", "flag"):=list("species",spp_name,genus_name,"check")]
invisible(NULL)
}
comNA <- function(X, index){
X[index, c("common","flag"):=list(NA, "check")]
invisible(NULL)
}
r2s <- function(X, index, rl1){
spp_name <- gsub("r2s --", "", rl1)
Ref <- X[index, ref]
ref2spp(Ref=Ref, spp_name, Z=X)
invisible(NULL)
}
undo <- function(X, hist, rl1, r){
count_r <- function(hist){sum(sapply(hist, function(x)!is.null(x$index)))}
undo_n <- function(rl1){as.integer(gsub("(undo --)", "", rl1))}
if(missing(r)){
r <- count_r(hist)
}
if(rl1=="z"){
n <- 1L
}else{
n <- undo_n(rl1)
}
undo_ind <- r:(r-n+1)
olds <- rbindlist(lapply(hist[undo_ind], function(x)X[x$index]))
for(h in undo_ind){
# X[hist[[h]]$index] <- hist[[h]]$X
cn <- names(X)
X[hist[[h]]$index, c(cn):=hist[[h]]$X]
}
news <- rbindlist(lapply(hist[undo_ind], function(x)X[x$index]))
# cat(print(olds))
cat("\n>>> reverted to\n")
cat(print(news))
# cat(print(X[hist[[r]]$index]))
flush.console()
invisible(NULL)
}
f <- function(X, index){
print(X[index, gnr_resolve(una(ref))])
invisible(NULL)
}
s <- function(X, index){
tsn <- get_tsn(X[index,una(spp)], ask=FALSE, verbose=FALSE)
acc <- itis_acceptname(tsn)
print(acc)
invisible(NULL)
}
hist <- list(list(index=NULL, X=NULL))
check_vec <- 1:sum(check_ind)
check_tot <- length(check_vec)
if(random){
check_vec <- sample(check_vec, check_tot, replace=FALSE)
}
check_num <- 0
for(r in check_vec){
check_num <- check_num + 1
t_ind <- cumsum(check_ind)==r & check_ind
out <- NULL
hist[[check_num]] <- list(index=t_ind, X=X[t_ind])
while(is.null(out) || (!is.null(out) & out!="c")){
t_pr <- pr(X, t_ind, check_num=check_num, check_tot=check_tot)
out <- switch(t_pr$piece,
change=change(X, t_ind, t_pr$rl1),
others=others(X, t_ind, t_pr$rl1),
clearFlag=clearFlag(X, t_ind),
genusCheck=genusCheck(X, t_ind),
splitSpp=splitSpp(X, t_ind),
comNA=comNA(X, t_ind),
r2s=r2s(X, t_ind, t_pr$rl1),
undo=undo(X, hist, rl1=t_pr$rl1, r=check_num),
z=undo(X, hist, rl1="z", r=check_num),
g=system("bash -l", input=paste0("google ", "'",cull(X[t_ind,ref])," itis'")),
f=f(X, t_ind),
s=s(X, t_ind),
c="c"
)
}
}
invisible(NULL)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.