#' This function can be used to check count data for errors
#'
#' @param df data.frame of count data. This dataframe will be the csv file downloaded from the Ech. Project website
#' @param viewAllMismatches logical default is FALSE
#' @return output, a list containing 5 objects: mismatches, cantCount, checkNotes, missingCounts, and checkZeros
#' @return mismatches, counter noted that the envelope letNo in the scan did not match the computer letNo
#' @return cantCount, counter noted that the scan could not be counted
#' @return checkNotes, records where counter made a note. Check through notes!
#' @return missingCounts, letNos that were not counted 3 times
#' @return checkZeros,
manageCountData <- function(df, viewAllMismatches = FALSE){
mismatches <- df[!df$letno_matches_yn %in% "y",
c("seed_count_assignment_id", "image_file_link", "letno", "experiment","batch", "assigned.user", "corrected_letno")]
if (!viewAllMismatches) {
cln <- toupper(mismatches$corrected_letno)
cln <- gsub("\\s|\\-", "", cln)
ln <- gsub("\\-", "", mismatches$letno)
mismatches <- mismatches[cln != ln,]
}
cantCount <- df[!df$can_count_yn %in% "y",
c("seed_count_assignment_id", "image_file_link", "letno", "batch", "assigned.user")]
checkNotes <- df[!df$notes %in% c("", NA), c("seed_count_assignment_id", "letno", "experiment", "batch", "assigned.user",
"count", "notes")]
zz <- as.data.frame(table(as.character(df$letno)))
colnames(zz) <- c("letno", "counts")
checkLetnos <- zz[!zz$counts %in% 3, ]
zeros <- unique(df$letno[df$count < 3 | df$count > 900])
checkZeros <- df[df$letno %in% zeros, c("count", "seed_count_assignment_id", "letno", "experiment", "batch", "assigned.user")]
checkZeros <- checkZeros[order(checkZeros$letno),]
output <- list(mismatches = mismatches,
cantCount = cantCount,
checkNotes = checkNotes,
missingCounts = checkLetnos,
checkZeros = checkZeros)
output
}
# manageCountData(exp1996_2012)
#' The batch summary function summarizes information from harvest datasheet (hh.year)
#'
#' @param hh harvest data frame (e.g. hh.2014)
#' @return summary, a summary of each cg batch including letNo range and the number of heads
batchSummary <- function(hh) {
min <- ddply(hh, .(batch), function(x) min(x[,"No"]))
max <- ddply(hh, .(batch), function(x) max(x[,"No"]))
batchSize <- ddply(hh, .(batch), function(x) length(x[,"No"]))
summary <- merge(min, max, by = "batch")
summary <- merge(summary, batchSize, by = "batch")
names(summary) <- c("batch", "startNo", "endNo", "batchSize")
summary <- summary[order(summary$startNo),]
summary
}
#' create assignment csv to upload
#'
#' createCSV automates the creation of an upload-ready csv for the Echinacea Project's online counting database.
#' This function now works for achene counts and x-rays.
#' The function is set up to create 3 records for each letno
#' (3 rounds of counting). The function randomizes the order of records,
#' no need for pre-upload randomization
#'
#' @param scansdf is a data frame of scan filenames and letnos, scans (from function loadScans) by default
#'
#' @param harvYear is the year heads were harvested, this needs to be entered
#' @param priority is the counting priority of the scans, 50 by default
#' @param type the type of upload csv to make, imageTool has filepaths starting
#' with "C:/", online has file paths starting with the online achene counter, and xray
#' has file paths with the online x-ray counter
#' @return returns out, an upload-ready data.frame
#' @return simply type in write.csv(out, "C:/assignmentYear.csv", row.names = FALSE to save)
createCSV = function(scansdf = scans, harvYear, priority = 50,
type = c("imageTool", "online", "xray")) {
type <- match.arg(type)
if (type == "imageTool") {
filePath = paste("C:/cg", harvYear, "scans/", sep = "")
} else if (type == "online") {
capp <- "http://echinaceaproject.org/count/acheneCounter/"
qstr <- "?img=http://echinaceaproject.org/count/scanImages/"
filePath = paste(capp, qstr, "cg", harvYear, "scans/", sep = "")
} else if (type == "xray") {
capp <- "http://echinaceaproject.org/count/xrayCounter/"
qstr <- "?img=http://echinaceaproject.org/count/xrayImages/"
filePath = paste(capp, qstr, "cg", harvYear, "scans/", sep = "")
}
batchName <- gsub("[A-z][A-z]-", "", scansdf$letno)
batchName <- gsub("[0-9][0-9][0-9]$", "000", batchName)
out <- data.frame(image_type = ifelse(type == "xray", "xray", "achene"),
image_file_link = rep(paste(filePath, scansdf$batch, "/",
scansdf$filename, sep = ""), 3),
letno = rep(scansdf$letno, 3),
batch = rep(batchName, 3),
experiment = rep(scansdf$batch, 3),
harvest_year = harvYear,
priority = priority,
round = c(rep(1, length(scansdf$letno)), rep(2, length(scansdf$letno)),
rep(3, length(scansdf$letno))),
user = "")
out <<- out[sample(nrow(out)),] # randomize rows
return(paste("out, an upload-ready data frame is in your workspace. Save using write.csv()"))
}
#' Check scans then create and write an upload csv
#'
#' This will create and write a csv with the counting assignments as it
#' should be ready to upload
#'
#' @param scansFolder the folder in which the scans can be found
#' @param writeTo the file to which the upload csv will be written
#' @param year the harvest year of the scans
#' @param exprio a data frame containing two columns: (1) the experiments
#' to upload and (2) the priorities for the experiments. The first column
#' should have the experiments and the second column should have priorities
#' @param oneCt a data frame whose first column is experiments that should
#' only be counted once and whose second column is the username of the person
#' to count that experiment
#' @param type the type of upload csv to make see \code{\link{createCSV}}
#' @return the results of check.batch
writeUploadCSV <- function(scansFolder, writeTo, year, exprio, type, oneCt = NULL) {
# load in files and remove unecessary ones
loadScans(path = scansFolder)
scans <- scans[!(scans$filename %in% c("Thumbs.db", "itfiles.ini")),]
hh <- get(paste0("hh.", year))
checked <- check.batch(batch = exprio[,1], scansdf = scans, harvestFile = hh)
if (checked$missingCount > 0) {
warning("Missing", checked$missingCount, "scans. See returned value for details.")
}
# if there are no missing scans, create assignment csv and write to file
createCSV(scansdf = scans, harvYear = year, priority = 50, type = type)
# only take ones in the experiments we want to upload
out <- out[out$experiment %in% exprio[,1],]
# change priorities to what you want
for (i in 1:nrow(exprio)) {
out[out$experiment %in% exprio[i,1], "priority"] <- exprio[i,2]
}
if (!is.null(oneCt)) {
# get only one record per head for these experiments
out[out$experiment %in% oneCt[,1], "round"] <- 1
out <- out[!duplicated(out),]
# change this to decide who counts
for (i in 1:nrow(oneCt)) {
out[out$experiment %in% oneCt[i,1], "user"] <- oneCt[i,2]
}
}
out <<- out
# write the file to a csv
write.csv(out, writeTo, row.names = FALSE)
return(checked)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.