In dmwarn/CruiseCheckeR: Provide Tools to Evaluate Data Quality

thename <- (paste0("Year", "-", year,
         "-", "Lake", "-", lake, "-", "Vessel", "-",vessel, "-",
         "Cruise", "-", cruise, "-","Target", "-", target))
dat <- Sys.Date()
info <- Sys.info()
auth <- info[7]

title: r thename date: r dat author: r auth

This code will provide you an overview of the characteristics of trawl data tables. It will only work for csv data if your files are called op, tr_op, tr_catch, tr_l, tr_lf, tr_fish. It will fail with any other file names. It is just that simple. Fix the file names or edit the code to accept a wider range of file names with can be coerced to these and I will gladly add that to the package/function.

The code will behave differently depending on whether or not the input is "csv" or not. The first step in examing csv files is to see if they have required fields. Such fields represent those that can't be null in the RVCAT database. If required fields are absent, or they are not properly named, the user will be prompted to remedy this with a message stating which fields are not correct. This step is only necessary for csv files.

OP Variables

library(janitor)
library(tidyverse)
library(CruiseCheckeR)

data("op_names")
op <- op %>% clean_names(case = "all_caps")
  obs_names <- names(op)
  #check for required fields that can't be NULL. Excepted for csv inputs
  #is 'OP_ID'
required <- c('YEAR', 'VESSEL', 'SERIAL', 'SAMPLE_TYPE', 'LAKE', 'PORT',                             'CRUISE', 'OP_DATE', 'TIME')  
missed <- setdiff(required, obs_names)
if (length(missed) > 0)
    stop(print(paste0("The Op data are missing the required field(s) ", missed)))

Table of OP Variables

df <- as.data.frame(CruiseCheckeR::dfSmry(op))

knitr::kable(df)

Plots of OP Variables

dfPlot(op, mcex=1.1, cex = 0.8, mfrow=c(2,2))

Map of OP Locations

library(leaflet)
library(leaflet.extras)
library(htmltools)
library(htmlwidgets)
library(mapedit)
library(mapview)

#list of possible location fields
begfields <- c("BEG_LATITUDE_DD",  "BEG_LONGITUDE_DD")
endfields <- c("END_LATITUDE_DD", 
               "END_LONGITUDE_DD")

#if (names(op) %in% c(begfields))
leaflet(op) %>%
  addTiles() %>%
  addCircleMarkers(lng = ~BEG_LONGITUDE_DD, lat = ~BEG_LATITUDE_DD, opacity=0.1, radius =4,
             popup = paste("Serial:", op$SERIAL, "<br>",
                           "Beg_Depth:", op$BEG_DEPTH, "<br>",
                           "Op_Date:", op$OP_DATE, "<br>",
                           "Vessel:", op$VESSEL, "<br>",
                           "Start Latitude:", op$BEG_LATITUDE_DD, "<br>",
                           "Start Longitude:", op$BEG_LONGITUDE_DD))

#if (names(op) %in% c(endfields))
#leaflet(op) %>%
#  addTiles() %>%
#  addCircleMarkers(lng = ~END_LONGITUDE_DD, lat = ~END_LATITUDE_DD, opacity=0.1, radius =4,
#             popup = paste("Serial:", op$SERIAL, "<br>",
#                           "Beg_Depth:", op$BEG_DEPTH, "<br>",
#                           "Op_Date:", op$OP_DATE, "<br>",
#                           "Vessel:", op$VESSEL, "<br>",
#                           "End Latitude:", op$END_LATITUDE_DD, "<br>",
#                           "End Longitude:", op$END_LONGITUDE_DD))

TR_OP Variables

data("tr_op_names")
tr_op <- tr_op %>% clean_names(case = "all_caps")
obs_names <- names(tr_op)
#check for required fields that can't be NULL. Excepted for csv inputs
#is 'OP_ID'

if (dat.source =="csv") 
{
required <- c('YEAR', 'VESSEL', 'SERIAL', 
      "TOW_TIME", "SPEED", "SPEED_UNIT",
      "TYPE_SET", "MESH_SIZE", "TR_DESIGN")
} else {
  required <- c("OP_ID",  
      "TOW_TIME", "SPEED", "SPEED_UNIT",
      "TYPE_SET", "MESH_SIZE", "TR_DESIGN")
}
missed <- setdiff(required, obs_names)
if (length(missed) > 0)
    stop(print(paste0("The tr_op data are missing the required field(s) ", missed)))

Table of TR_OP Variables

df <- as.data.frame(CruiseCheckeR::dfSmry(tr_op))
knitr::kable(df)

Plots of TR_OP Variables

dfPlot(tr_op, mcex=1.1, cex = 0.8, mfrow=c(2,2))
hist(tr_op$SPEED)

TR_CATCH Variables

data("tr_catch_names")
tr_catch <- tr_catch %>% clean_names(case = "all_caps")
tr_catch$avg_wt <- tr_catch$WEIGHT/tr_catch$N
obs_names <- names(tr_catch)
#check for required fields that can't be NULL. Excepted for csv inputs
#is 'OP_ID'

if (dat.source =="csv") 
{
required <- c('YEAR', 'VESSEL', 'SERIAL', 
      "SPECIES", "N")
} else {
  required <- c("OP_ID",  "SPECIES", "N")
}
missed <- setdiff(required, obs_names)
if (length(missed) > 0)
    stop(print(paste0("The tr_catch data are missing the required field(s) ", missed)))

Table of TR_CATCH Variables

df <- as.data.frame(CruiseCheckeR::dfSmry(tr_catch))

knitr::kable(df)

Plots of TR_CATCH Variables

library(plotly)
# These are canned plots for all variables.
dfPlot(tr_catch, mcex=1.1, cex = 0.8, mfrow=c(2,2))

Additional Plots of TR_CATCH Variables

# Now custom plot to look for errors in catch weight
tr_catch$Species <- factor(tr_catch$SPECIES)
options(scipen=1000)
gg <- ggplot()+
  geom_point(data = tr_catch, aes(x = Species, y = round(avg_wt, 1)))+
  scale_y_log10()+
  theme_bw()+
  ggtitle("Plot of Catch Weight/Catch N by Species")+
  labs(y = "Weight/N (g)")

ggplotly(gg)

Table of TR_L Variables

if (dat.source !="csv") {
df <- as.data.frame(CruiseCheckeR::dfSmry(tr_l))
knitr::kable(df)
} else {
  print("The data source is csv, so the TR_L table does not exist!")
}

Plots of TR_L Variables

if (dat.source !="csv") {
# These are canned plots for all variables.
dfPlot(tr_l, mcex=1.1, cex = 0.8, mfrow=c(2,2))
} else {
    print("The data source is csv, so the TR_L table does not exist!")
}

Plots of TR_L Variables

# Now custom plot to look for errors in catch weight
if (dat.source !="csv") {
options(scipen=1000)
gg <- ggplot(tr_l, aes(LENGTH))+
  geom_histogram(binwidth = 10)+
  theme_bw()+
  facet_wrap(~Species, ncol = 2, scales = "free")+
  ggtitle("Plot of Length Frequency by Species")

ggplotly(gg)
} else {
    print("The data source is csv, so the TR_L table does not exist!")
}

Table of TR_FISH Variables

data("tr_fish_names")
tr_fish <- tr_fish %>% clean_names(case = "all_caps")
obs_names <- names(tr_fish)
#check for required fields that can't be NULL. Excepted for csv inputs
#is 'OP_ID'

if (dat.source =="csv") 
{
required <- c('YEAR', 'VESSEL', 'SERIAL', 
      "SPECIES", "LENGTH")
} else {
  required <- c("TR_FISH_ID", "OP_ID",  "SPECIES", "LENGTH")
}
missed <- setdiff(required, obs_names)
if (length(missed) > 0)
    stop(print(paste0("The tr_fish data are missing the required field(s) ", missed)))

df <- as.data.frame(CruiseCheckeR::dfSmry(tr_fish))
knitr::kable(df)

Plots of TR_FISH Variables

# These are canned plots for all variables.
dfPlot(tr_fish, mcex=1.1, cex = 0.8, mfrow=c(2,2))

Additional Plots of TR_FISH Variables

library(plotly)
# Now custom plot to look for errors in catch weight
options(scipen=1000)
gg <- ggplot(tr_fish, aes(LENGTH))+
  geom_histogram(binwidth = 10)+
  facet_wrap(~SPECIES, scales = "free", ncol = 2)+
  theme_bw()+
  ggtitle("Plot of Length Distribution by Species")

#now do weight
ggplot(tr_fish, aes(WEIGHT))+
  geom_histogram(bins = 50)+
  facet_wrap(~SPECIES, scales = "free", ncol = 2)+
  theme_bw()+
  ggtitle("Plot of Weight Distribution by Species")


ggplot()+
  geom_point(data = tr_fish, aes(x = LENGTH, y = round(WEIGHT, 1)))+
  facet_wrap(~SPECIES, scales = "free", ncol = 2)+
  theme_bw()+
  ggtitle("Plot of Length Versus Weight by Species")

dmwarn/CruiseCheckeR documentation built on Oct. 11, 2020, 9:56 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

dmwarn/CruiseCheckeR
Provide Tools to Evaluate Data Quality

In dmwarn/CruiseCheckeR: Provide Tools to Evaluate Data Quality

OP Variables

Table of OP Variables

Plots of OP Variables

Map of OP Locations

TR_OP Variables

Table of TR_OP Variables

Plots of TR_OP Variables

TR_CATCH Variables

Table of TR_CATCH Variables

Plots of TR_CATCH Variables

Additional Plots of TR_CATCH Variables

Table of TR_L Variables

Plots of TR_L Variables

Plots of TR_L Variables

Table of TR_FISH Variables

Plots of TR_FISH Variables

Additional Plots of TR_FISH Variables

R Package Documentation

Browse R Packages

We want your feedback!

dmwarn/CruiseCheckeR Provide Tools to Evaluate Data Quality

In dmwarn/CruiseCheckeR: Provide Tools to Evaluate Data Quality

OP Variables

Table of OP Variables

Plots of OP Variables

Map of OP Locations

TR_OP Variables

Table of TR_OP Variables

Plots of TR_OP Variables

TR_CATCH Variables

Table of TR_CATCH Variables

Plots of TR_CATCH Variables

Additional Plots of TR_CATCH Variables

Table of TR_L Variables

Plots of TR_L Variables

Plots of TR_L Variables

Table of TR_FISH Variables

Plots of TR_FISH Variables

Additional Plots of TR_FISH Variables

R Package Documentation

Browse R Packages

We want your feedback!

dmwarn/CruiseCheckeR
Provide Tools to Evaluate Data Quality