R/readces.R

Defines functions readces

Documented in readces

# check for duplicate rows on a day - is this a merge problem
readces <-
function(file=NULL, visits='std', group.race=TRUE, fix=FALSE, winter=FALSE, verbose=FALSE){
  
  # create a blank vector for collecting dodgy records, set here so we know it exists
  report.cols <- c("error", "RowNo", "ring", "species", "sitename", "age_in", "sex_in", "day", "month", "year")
  warning.flag <- 0 # if any warnings arise (probable) suggest setting verbose to TRUE

  if( is.null(file) )
    file <- file.choose()
  
  if( !file.exists(file) )
    stop(paste("cannot find the file:", file, "- please check typing and that are you in the right directory"))

  # get column names and work out how many
  coln <- strsplit(readLines(file, n=1), '[,;]')[[1]]
  
  # first check whether this is already an output of readces()
  final.names <- c("countryID", "sitename", "site", "lat", "long", "habitat",
                   "netlength", "visit", "julian", "day", "month", "year",
                   "StartTime", "EndTime", "scheme", "ring", "species", "age",
                   "sex", "race", "wing", "weight", "p3", "brood", "moult", 
                   "fat", "weighTime")
  if( sum(coln%in%final.names) == length(coln) ){
    # names match, so hopefully already in the right format
    message("Reading in a formatted CES data file")
    result <- suppressWarnings(data.table::fread(file))
    result <- as.data.frame(result)
    class(result) <- c('ces', 'data', 'data.frame')
    return(result)
  } 
  
  # short form names
  var.names <- c('countryID', 'siteID', 'coords', 'habitat', 'visit',
                 'day', 'month', 'year', 'netlength', 'StartTime', 'EndTime',
                 'scheme', 'ring', 'species', 'sex', 'age', 'brood',
                 'moult', 'wing', 'weight', 'weighTime', 'p3', 'fat', 'lat', 'long')
  # longer form ones
  alt.names <- c('Country_Identifier', 'Site_Identifier', 'sitename', 'coordinates', 
                 'site_coordinates', 'visit_period', 'visit_start_time', 'visit_end_time',
                 'total_net_length', 'ring_scheme', 'ring_number', 'brood_patch_score', 
                 'wing_length', 'mass', 'body_mass', 'time_of_weighing', 'length_p3', 
                 'fat_score', 'moult_state', 'habitat_type', 'start', 'end',
                 'latitude', 'longitude', 'lon', 'visitno', 'cessite')
  # map back to the main list
  column_nos <- c(1:25, 1, 2, 2, 3, 3, 5, 10, 11, 9, 12, 13, 17, 19, 20, 20, 21,
                  22, 23, 18, 4, 10, 11, 24, 25, 25, 5, 2)
  
  match.names <- adist(tolower(coln), tolower(c(var.names, alt.names)))
  which.names <- unlist(apply(match.names, 1, which.min))
  col.numbers <- column_nos[which.names]
  col.names <- var.names[col.numbers]

  result <- suppressWarnings(data.table::fread(file))
  n.read <- nrow(result)
  # use suppressWarnings to avoid messages about bumping col classes late in the data
  data.table::setnames(result, col.names)

  # duplicates indicate something is amiss
  duplicated.cols <- which(duplicated(col.names))
  if( any(duplicated.cols) == TRUE ){ 
    wmessage <- paste('Unrecognised columns', paste(coln[duplicated.cols], collapse=', '), 'removed')
    warning(wmessage, call.=FALSE, immediate.=TRUE)
    result <- subset(result, select=col.names[-duplicated.cols])
  }

  result[ , RowNo := row.names(.SD)]
  result[ , error := NA]
  setcolorder(result, "RowNo")
  
  ## Ring Number ----
  # combine scheme and ring number to ensure unique identifiers
  result[ , ring := paste0(result$scheme, "_", gsub('[.]','',result$ring))]

  # check for duplicated rings on different species (yes, really!)
  dup_spp <- unique(result[ , c('species','ring')], by=c('species', 'ring')) 
  dup_ndx <- duplicated(dup_spp, by=c('ring'))
  n.duplicated <- sum(dup_ndx)
  if( n.duplicated > 0 ){
    if( verbose )
      result$error[result$ring %in% dup_spp$ring[dup_ndx]] <- "Mult Spp:"
    wmessage <- paste(n.duplicated, 'ring numbers are associated with more than one species code')
    warning(wmessage, call.=FALSE, immediate.=TRUE)
    warning.flag <- 1
  }
  
  ## Species ----
  # Check for unknown species
  unknown_spp <- nrow(result[species==0 | species==99999, ])
  if( unknown_spp > 0 ){
    result <- result[!(species==0 | species==99999), ]
    wmessage <- paste(unknown_spp, 'records with no species ("0", "99999") have been removed')
    warning(wmessage, call.=FALSE, immediate.=TRUE)
  }
  
  # remove races if required
  result[ , race := species]  # just so we know it is there
  if( group.race ){
    result[ , species := (10 * floor(as.numeric(as.character(species))/10))]  # concatenate races, original code now in race
    wmessage <- paste("Subspecies have been grouped in the 'species' column, use the 'race' column,",
                      "or group.race=FALSE if this is not desired")
    message(wmessage)
  }
  
  # check species are valid Euring codes
  int_spp <- suppressWarnings(as.integer(as.character(result$species)))  # force them to be integer
  char_spp <- unique(as.character(result$species[is.na(int_spp)])) # select the rejected records
  if( length(char_spp) > 0 ){
    if( verbose )
      result$error[result$species %in% char_spp] <- "Spp Code:"
    wmessage <- paste('Non-numeric species codes:', paste(char_spp,collapse=', '), 'will be deleted')
    warning(wmessage, call.=FALSE, immediate.=TRUE)
    warning.flag <- 1
    result <- result[!is.na(int_spp), ]
  }
  # now check if they are likely CES species
  dodgy <- unique(int_spp[!(int_spp %in% cesnames$spp) & !is.na(int_spp)])
  if( length(dodgy) > 0 ){
    wmessage <- paste('Unrecognised species codes encountered:', paste(dodgy,collapse=', '), ', consider checking them')
    message(wmessage)
  }
  result[ , species := as.factor(species)]
  
  ## Age ----
  # check that ages are all 3/4 for simplicity later on...
  result[ , age_in := age] # keep a copy of the original for error reports
  if( length(result$age[!result$age %in% c(2, 3, 4)]) > 0 ){
    result[ , age1 := as.character(age)]
    result[ , age := NULL] # sometimes it is character, so start from scratch
    setkey(result, 'age1')
    ages <- sort(unique(result$age1))
    adult_s <- c('4','5','6','7','8','9','A','B','C','D','E','F','G','H','J','K','L','M','P','N','Q','R','S','T')
    adult_w <- c('6','7','8','9','A','B','C','D','E','F','G','H','J','K','L','M','P','N','Q','R','S','T')
    if( winter ){
      if( month > 6){
        result[age1 %in% adult_s, age := 4] 
        result[age1 %in% c('3','3J'), age := 3] 
        result[age1 == 2, age := 2] 
      } else {
        result[age1 %in% adult_w, age := 4] 
        result[age1 %in% c('3','5'), age := 3] 
        result[age1 == 4, age := 2] 
      }
    } else {
      result[age1 %in% adult_s, age := 4] 
      result[age1 %in% c('3','3J'), age := 3] 
      result[age1 == 2, age := 2] 
    }
    result <- result[ age %in% c(2, 3, 4) ] # anything else we just delete, note should keep 2s 
    result[ , age1 := NULL]
    ages <- ages[!ages %in% c('2','3','3J','4','5','6')] # check for non 3/4 age codes
    if( length(ages) > 0 ){
      wmessage <- paste('Age-codes:', paste(ages, collapse=','), 'have been recoded as 4 or deleted')
      message(wmessage)
    }
  }
  result[ , age := as.integer(age)]
  # check for juveniles not in first year of ringing
  dodgy <- merge(result[ , min(year), by=ring], result[age==3, max(year), by=ring], by='ring')
  names(dodgy) <- c('ring', 'min', 'minj')
  result <- merge(result, dodgy, by='ring', all.x=TRUE)
  dodgy <- dodgy[minj > min, ]
  if( nrow(dodgy) > 0 ){
    rings <- unique(dodgy$ring)
    if(verbose )
      result$error[result$ring %in% rings] <- "Age corr:"
    if( fix ){
      result[(age==3 & year>min), age := 4]
      wmessage <- paste(nrow(dodgy), "age records of impossible 3's corrected")
      warning(wmessage, call.=FALSE, immediate.=TRUE)
    } else {
      wmessage <- paste(nrow(dodgy), 'individuals aged 3 after first year of ringing')
      warning(wmessage, call.=FALSE, immediate.=TRUE)
    }
    warning.flag <- 1
  }
  result[ , ':=' (min=NULL, minj=NULL)] # no longer needed
  
  ## Sex ----
  result[ , sex_in := sex] # keep a copy of the original for error reports
  result[sex %in% c('1','3','5','8','m','M'), sex := 'M'] 
  result[sex %in% c('2','4','6','9','f','F'), sex := 'F'] 
  result[sex %in% c('0','7','-','U'), sex := "-"] # just for the record 
  result[!(sex %in% c('F', 'M')), sex:= "-"] # pick up any other entries
  
  sexes <- setDT(result)[sex%in%c('F','M'), .N, by=.(sex,ring)][order(-N), .(sex_t=sex[1L]), keyby=ring]
  result <- merge(result, sexes, by='ring', all.x=TRUE)
  nch1 <- sum(result$sex != result$sex_t, na.rm = TRUE)
  nch2 <- sum(result$sex[result$sex%in%c("F","M")] != result$sex_t[result$sex%in%c("F","M")], na.rm = TRUE)
  if( nch2 > 0 )
    warning.flag <- 1
  if( nch2 > 0 & verbose ){
    dodgy <- unique(result[sex!=sex_t & sex_t!="-" & sex!="-", ][ , ring])
    # this construction saves having to use result$ on every variable - neat!
    wmessage <- paste(nch2, 'records changed sex')
    warning(wmessage, call.=FALSE, immediate.=TRUE)
    if( verbose )
      result$error[result$ring %in% dodgy] <- "Sex corr:"
  }
  if( nch1 > 0 & fix ){
    # fill in M/F (according to which is most commonly recorded)
    wmessage <- paste(nch1, 'sexes fixed, of which', nch2, 'records changed sex')
    warning(wmessage, call.=FALSE, immediate.=TRUE)
    result$sex[!is.na(result$sex_t)] <- result$sex_t[!is.na(result$sex_t)]
  }
  result[ , sex := as.factor(sex)]
  result[ , sex_t := NULL]    
  
  # Site Names ----
  if( length(grep("[#]", as.character(result$siteID))) > 0 ){ # yes, really - thanks to Arizaga
    wmessage <- "Using the '#' sign in sitenames confuses Mark, rename your sites!"
    warning(wmessage, call.=FALSE, immediate.=TRUE)
  }
  result[ , sitename := as.factor(siteID)] 
  result[ , site := as.numeric(as.factor(siteID))]
  result[ , siteID := NULL] # no longer needed
  
  ## Visits ----
  result[ , visit := as.character(visit)]
  na.visit <- sum(is.na(result$visit))
  if( na.visit > 0 ){
    wmessage <- paste(na.visit, "records without a visit number deleted")
    warning(wmessage, call.=FALSE, immediate.=TRUE)
    result <- result[!is.na(visit)]
  }
  if( length(visits) == 1 & visits == 'std' ){
    stdv <- c ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13')
    n.extra <- length(result$visit[!result$visit%in%stdv])
    if( n.extra > 0 ){
      wmessage <- paste(n.extra, ifelse(n.extra==1, "encounter", "encounters"), "on non-standard visits removed")
      message(wmessage)
    }   
    result <- result[visit %in% stdv]
    result[ , visit := as.integer(visit)]
  } else {
    if( length(visits) > 1 )
      result <- result[visit %in% visits, ]
    if( any(is.na(suppressWarnings(as.integer(result$visit)))) ){
      wmessage <- "visit will be treated as character"
      message(wmessage)
    } else 
      result[ , visit := as.integer(visit)]
  }
  
  ## Dates ----
  nmissd <- nmissm <- nmissy <- 0 # just so the if lower down doesn't fail
  if( !is.integer(result$day) ){
    suppressWarnings(result[ , day := as.integer(day) ])
    nmissd <- is.na(result$day)
  }
  if( !is.integer(result$month) ){
    suppressWarnings(result[ , month := as.integer(month) ])
    nmissm <- is.na(result$month)
  }
  if( !is.integer(result$year) ){
    suppressWarnings(result[ , year := as.integer(year) ])
    nmissy <- is.na(result$year)
  }
  count <- sum((nmissd + nmissm + nmissy) > 0)
  if( count > 0 ){
    wmessage <- paste('Non-numeric dates detected in', count, 'records')
    warning(wmessage, call.=FALSE, immediate.=TRUE)
  }
  invalid.days <- nrow(result[day < 1 | day > 31])
  if( invalid.days > 0 ){
    if( verbose )
      result$error[result$day < 1 | result$day > 31] <- "Day val :"
    wmessage <- paste(invalid.days, 'day values outside the range 1-31 detected')
    warning(wmessage, call.=FALSE, immediate.=TRUE)
    warning.flag <- 1
  }
  
  # are dates in the (typical) CES period?
  if( !winter ){
    non.summer <- nrow(result[month < 4 | month > 9])
    if( non.summer > 0 ){
      if( verbose )
        result$error[result$month < 4 | result$month > 9] <- "Mon val :"
      wmessage <- paste(non.summer, ifelse(non.summer==1,'record','records'), 'outside the period April to September, is this expected?')
      warning(wmessage, call.=FALSE, immediate.=TRUE)
      warning.flag <- 1
    }
  }
  
  # now create a Julian day column for doing phenology things
  # create jday first to avoid an error about using $ with atomic vectors
  jday <- function(d, m, y){ strptime(paste0(m, '/', d, '/', y), format="%m/%d/%Y")$yday }
  result[ , julian := jday(day, month, year)]
  # and reset days if a winter CES
  if( winter ){ # set start dates to be July and change year Julian date accordingly
    if( month < 7 ){
      result[ , year := year-1]
      result[ , julian := julian + 184]
    }  
    else
      result[ , julian := julian - 181]
    # are dates in the (typical) winter CES period?
    non.winter <- nrow(result[month > 3 & month < 10])
    if( non.summer > 0 ){
      if( verbose )
        result$error[result$month > 3 & result$month < 10] <- "Mon val :"
      wmessage <- paste(non.winter, ifelse(non.winter==1,'record','records'), 'outside the period October to March, is this expected?')
      warning(wmessage, call.=FALSE, immediate.=TRUE)
      warning.flag <- 1
    }
  }

  ## Coordinates ----
  if( any(colnames(result)=="coords") ){

    if( !is.character(result$coords) ){
      wmessage <- paste0('Coordinates not in Euring format "+ddmmss', quote("\uB1"), 'dddmmss"')
      warning(wmessage, call.=FALSE, immediate.=TRUE)
    }
    coords <- result$coords
    # first check to see whether they are the right length (14, 15 characters)
    llfmt <- mean(nchar(coords), na.rm=TRUE) 
    if( !llfmt %in% c(14, 15) ){
      err <- which(!nchar(coords) %in% c(14, 15))
      if( length(err) == 0 )
        wmessage <- "Check coordinates field - a mix of lengths detected?"
      else
        wmessage <- paste("Unrecognised coordinate format in sites:", paste(unique(result$sitename[err]), collapse=', '))
      warning(wmessage, call.=FALSE, immediate.=TRUE)
      llfmt <- round(llfmt, 0)
    }
    
    # first check for decimal degrees
    if( any(as.numeric(substr(coords,4,4)) > 5) ){ # i.e. there are minutes > 50
      wmessage <- 'Reading in coordinates as decimal degrees'
      message(wmessage)

      result [ , lat := as.integer(substr(coords,1,7))/10000]
      result [ , long := as.integer(substr(coords,8,20))/10000]

    # no? then in Euring ddmmss format
    } else {
      
      # get the latitudes
      c1 <- suppressWarnings(as.integer(substr(coords,1,3))) # avoid cryptic warning about coerced NAs
      c2 <- suppressWarnings(as.integer(substr(coords,4,5))/60)
      c3 <- suppressWarnings(as.integer(substr(coords,6,7))/3600)
      if( anyNA(c(c1, c2, c3)) ){
        wmessage <- "missing values generated for latitude, are all the coordinates 15 characters long?"
        warning(wmessage, call.=FALSE, immediate.=TRUE)
      }
      if( max(c2, c3, na.rm=TRUE) > 60 ){
        wmessage <- "values greater than 60 detected in latitude minutes/seconds, check coordinate format"
        warning(wmessage, call.=FALSE, immediate.=TRUE)
      }
      result[ , lat := c1 + c2 + c3]
      
      # now the longitudes
      ew <- ifelse(substr(coords,8,8) == '-', -1 , 1) # hemisphere
      if( llfmt == 15 ){   # Euring code specifies 15 chars, but just in case long deg is 2 digits rather than 3
        if( anyNA(as.integer(substr(coords,9,11))) | 
            anyNA(as.integer(substr(coords,12,13))) | 
            anyNA(as.integer(substr(coords,14,15))) ){
          wmessage <- "missing values generated in longitude, check for stray non-numeric characters"        
          warning(wmessage, call.=FALSE, immediate.=TRUE)
        }
        c1 <- suppressWarnings(as.integer(substr(coords,9,11)))
        c2 <- suppressWarnings(as.integer(substr(coords,12,13)))/60
        c3 <- suppressWarnings(as.integer(substr(coords,14,15)))/3600
        if( max(c2, c3, na.rm=TRUE) > 59 ){
          wmessage <- "values greater than 59 detected in longtitude minutes/seconds, check coordinate format"
          warning(wmessage, call.=FALSE, immediate.=TRUE)
        }
        result [ , long := ew * (c1 + c2 + c3)]
      } else if( llfmt == 14 ){
        if( anyNA(as.integer(substr(coords,9,10))) | 
            anyNA(as.integer(substr(coords,11,12))) | 
            anyNA(as.integer(substr(coords,13,14))) ){
          wmessage <- "missing values generated in longitude, check for possible stray non-numeric characters" 
          warning(wmessage, call.=FALSE, immediate.=TRUE)
        }
        c1 <- suppressWarnings(as.integer(substr(coords,9,10)))
        c2 <- suppressWarnings(as.integer(substr(coords,11,12))/60)
        c3 <- suppressWarnings(as.integer(substr(coords,13,14))/3600)
        if( max(c2, c3, na.rm=TRUE) > 59 ){
          wmessage <- "values greater than 59 detected in longitude minutes/seconds, check coordinate format"
          warning(wmessage, call.=FALSE, immediate.=TRUE)
        }
        result [ , long := ew * (c1 + c2 + c3)]
      } else {
        result [ , long := NA]
      }   
    } # end of the Euring format block

    result[ , coords := NULL ] # tidy-up
  } # end of reading in the coordinates 
  
  # now do a final check and tidy
  if( sum(colnames(result) %in% c("lat","long")) != 2 ) {
    wmessage <- paste('Coordinates not recognised, check you have either a coords or lat & long columns')
    warning(wmessage, call.=FALSE, immediate.=TRUE)
  } else {
    # do the rounding first to minimise effects of very small differences
    roundc <- function(x) floor(1000*x)/1000 # so consistently bottom-left
    result[ , c('lat','long') := lapply(.SD,roundc), .SDcols=c('lat','long')]

    # check that sites have only one set of coordinates
    check.sites <- table(unique(result[ , c('sitename', 'lat', 'long')])$sitename)
    if( length(table(check.sites)) > 1 ){
      dodgy <- dimnames(check.sites)[[1]][check.sites > 1]
      wmessage <- paste(length(dodgy), "site(s) have multiple coordinates")
      message(wmessage)
      coordacc <- function(x) max(abs(max(x)-min(x)))
      result[ , c('lata','lona') := lapply(.SD,coordacc), .SDcols=c('lat','long'), by=list(sitename)]
      dodgy <- result[!duplicated(result$sitename), c('sitename','lata','lona')]
      dodgy <- dodgy$sitename[dodgy$lata > 0.1 | dodgy$lona > 0.1]
      wmessage <- paste("sites:", paste(dodgy, collapse=", "), "have coordinates >10km apart")
      warning(wmessage, call.=FALSE, immediate.=TRUE)
      result[ , ':=' (lata=NULL, lona=NULL) ]
      
      if( fix ){ # averages to give one coordinate per site 
        result[ , c('lat','long') := lapply(.SD,mean), .SDcols=c('lat','long'), by=list(sitename)]
        result[ , c('lat','long') := lapply(.SD,roundc), .SDcols=c('lat','long')]
      }
    }
  }
  
  ## Net Length ----
  if( !is.integer(result$netlength) ){
    netl <- unique(result$netlength)
    suppressWarnings(result[ , netlength := as.integer(netlength) ])
    nmiss <- sum(is.na(result$netlength))
    if( nmiss > 0 ){
      netl <- netl[is.na(suppressWarnings(as.integer(netl)))]
      wmessage <- paste0('non-numeric net lengths (', paste(sprintf("'%s'", netl), collapse=","), ') detected in ', nmiss, ' records')
      warning(wmessage, call.=FALSE, immediate.=TRUE)
    }
  }
  nzero <- result[netlength == 0, .N]
  if( nzero > 0 ){
    result[netlength == 0, netlength := NA]
    wmessage <- paste('net length of zero detected in', nzero, ifelse(nzero==1,'record','records'), '; these set to NA')
    message(wmessage)
  }
  count.lengths <- result[ , .(count=uniqueN(netlength)), by=sitename]
  if( nrow(count.lengths[count>1]) > 0 ){
    wmessage <- paste('multiple net lengths detected sites:', 
                      paste(count.lengths$sitename[count.lengths$count>1], collapse=','))
    warning(wmessage, call.=FALSE, immediate.=TRUE)
  }
  
  ## Habitat ----
  result[ , habitat := toupper(habitat)]
  result[habitat %in% c('RD','RE'), habitat := 'RB']

  dodgy <- unique(result$habitat[!(result$habitat %in% c('DS','FA','GN','RB','WD','WS'))])
  if( length(dodgy) > 0 ){
    wmessage <- paste("unrecognised habitat codes:", paste(dodgy, collapse=', '))
    warning(wmessage, call.=FALSE, immediate.=TRUE)
  }
  result[ , habitat := as.factor(habitat)]

  ## Biometrics ----
  # rudimentary biometric checks for now
  # check for commas rather than decimal points
  if( is.character(result$wing) )
    result$wing <- suppressWarnings(as.numeric(gsub(",", ".", result$wing, fixed=TRUE)))
  if( is.numeric(result$wing) ){
    result[wing == 0, wing := NA]
    too.long <- unique(result$species[result$wing > 150 & !is.na(result$wing)])
    if( length(too.long) > 0 ){
      wmessage <- paste("long (>150mm) wing lengths recorded for species codes:",
                        paste0(too.long[order(too.long)], collapse=", "), "are you sure?")
      warning(wmessage, call.=FALSE, immediate.=TRUE)
    }
    too.short <- unique(result$species[result$wing < 45 & !is.na(result$wing)])
    if( length(too.short) > 0 ){
      wmessage <- paste("short (<45mm) wing lengths recorded for species codes:",
                        paste0(too.short[order(too.short)], collapse=", "), "are you sure?")
      warning(wmessage, call.=FALSE, immediate.=TRUE)
    }  
  } 
  if( is.character(result$weight) )
    result[ , weight := suppressWarnings(as.numeric(gsub(",", ".", weight, fixed=TRUE)))]
  if( is.numeric(result$weight) ){
    result[weight == 0, weight := NA]
    too.long <- unique(result$species[result$weight > 150 & !is.na(result$weight)])
    if( length(too.long) > 0 ){
      wmessage <- paste("heavy (>150g) weights recorded for species codes:",
                        paste0(too.long[order(too.long)], collapse=", "), "are you sure?")
      warning(wmessage, call.=FALSE, immediate.=TRUE)
    }
    too.short <- unique(result$species[result$wing < 5 & !is.na(result$wing)])
    if( length(too.short) > 0 ){
      wmessage <- paste("light (<5g) weights recorded for species codes:",
                        paste0(too.short[order(too.short)], collapse=", "), "are you sure?")
      warning(wmessage, call.=FALSE, immediate.=TRUE)
    }  
  }
  if( is.character(result$p3) )
    result$p3 <- suppressWarnings(as.numeric(gsub(",", ".", result$p3, fixed=TRUE)))

  ## Prepare Result ----
  # set country attribute
  country <- unique(result$countryID)
  if( length(country) > 1 )
    country <- substr(country[1], 1, 2)
  attr(result,'country') <- country
    
  if( warning.flag & verbose ){
    errfile <- paste0(getwd(), '/', country, '_err.csv')
    wmessage <- paste("Please review records in", sprintf("'%s'", errfile), "for errors")
    warning(wmessage, call.=FALSE, immediate.=TRUE)
    report.data <- result[!is.na(error), ..report.cols]
    setorder(report.data, ring, year, month, day)
    write.csv(report.data, file=errfile, row.names=FALSE, quote=FALSE)
  } else if( warning.flag & !verbose ){
    wmessage <- "Warnings were raised, consider using verbose=TRUE to review these records"
    message(wmessage)
  }
  
  n.out <- nrow(result)
  wmessage <- paste(n.read, "rows were read in,", n.out, "rows were retained")
  message(wmessage)
  
  # tidy up and order nicely
  result[ , ':=' (scheme=NULL, age_in=NULL, sex_in=NULL, RowNo=NULL, error=NULL) ] 
  
  col.order <- c("countryID", "sitename", "site", "lat", "long", "habitat", "netlength",
                 "visit", "julian", "day", "month", "year", "StartTime", "EndTime", 
                 "scheme", "ring", "species", "age", "sex", "race",
                 "wing", "weight", "p3", "brood", "moult", "fat", "weighTime")
  data.table::setcolorder(result, col.order[which(col.order %in% names(result))])
  if( fix )
    data.table::setorder(result, sitename, year, visit, species, ring)
  
  result <- as.data.frame(result)
  class(result) <- c('ces', 'data', 'data.frame')
  return(result)

}
btorobrob/cesr documentation built on June 9, 2025, 5:39 a.m.