build/scs.cat.1988-2002.R

library(gulf.data)

year <- 2002 # Survey year

# Function to find multiple keywords at a ime:
rep <- function(x, y, and = TRUE){
   if (and){
      ix <- 1:length(y) 
      fun <- intersect
   }else{
      ix <- NULL
      fun = union
   }
   for (i in 1:length(x)) ix <- fun(ix, grep(x[i], y))
   return(ix)
}

# Load raw data:
files <- locate(file = c(year, "cat", "csv"))
x <- read.csv(files, header = TRUE)
names(x) <- tolower(names(x))
x <- x[setdiff(names(x), c("english", "latin", "french"))]
names(x) <- gsub("townumber", "tow.number", names(x))
names(x) <- gsub("towid", "tow.id", names(x))
names(x) <- gsub("numbercaught", "number.caught", names(x))
names(x) <- gsub("weightcaught", "weight.caught", names(x))
names(x) <- gsub("species[.]1", "species.logbook", names(x))
names(x) <- gsub("species_bb", "species.logbook", names(x))

# Remove odd characters:
x$species.logbook <- unaccent(x$species.logbook)
x$comment <- unaccent(x$comment)

# Lowercase:
x$species.logbook <- tolower(x$species.logbook)
x$species.logbook <- gsub("american", "American", x$species.logbook)

# Create date field:
x$date <- as.character(date(x))

# Move to comments:
ix <- intersect(grep("blackbook", x$species.logbook), which(x$comment == ""))
x$comment[ix] <- x$species.logbook[ix]
x$species.logbook[ix] <- ""
ix <- intersect(grep("blackbook", x$species.logbook), which(x$comment != ""))
x$comment[ix] <- paste0(x$comment[ix], x$species.logbook[ix])
x$species.logbook[ix] <- ""
ix <- intersect(grep("logbook", x$species.logbook), which(x$comment == ""))
x$comment[ix] <- x$species.logbook[ix]
x$species.logbook[ix] <- ""
ix <- intersect(grep("logbook", x$species.logbook), which(x$comment != ""))
x$comment[ix] <- paste0(x$comment[ix], x$species.logbook[ix])
x$species.logbook[ix] <- ""

# Remove pluralizations:
x$species.logbook <- gsub("s$", "", x$species.logbook)
x$species.logbook <- gsub("s ", " ", x$species.logbook)

# Spelling mistakes:
x$species.logbook[x$species.logbook == "anglefin"]  <- "aiglefin"  
x$species.logbook <- gsub("a[l]+igator[e]*", "alligator", x$species.logbook)  
x$species.logbook <- gsub("a[n]+elid[e]*", "annelid", x$species.logbook) 
x$species.logbook <- gsub("artica", "arctica", x$species.logbook) 
x$species.logbook <- gsub("ar[e]*a[n]*e[n]*u[s]*", "araneus", x$species.logbook)   
x$species.logbook <- gsub("areanu[s]*", "araneus", x$species.logbook)   

x$species.logbook <- gsub("bar[l]*b[o]*u", "barbu", x$species.logbook) 

x$species.logbook <- gsub("blenni[e]*", "blenny", x$species.logbook) 
x$species.logbook <- gsub("bigorneaux", "bigorneau", x$species.logbook)  
x$species.logbook <- gsub("ca[n]+adie[n]+e", "canadienne", x$species.logbook) 
x$species.logbook <- gsub("caod", "cod", x$species.logbook) 
x$species.logbook <- gsub("cock[l]*e[r]*", "cockle", x$species.logbook) 
x$species.logbook[grep("co[m]co[nm]bre", x$species.logbook) ] <- "concombre"
x$species.logbook <- gsub("coarcta[ct]u$", "coarctatus", x$species.logbook) 

x$species.logbook <- gsub("centiped$", "centipede", x$species.logbook) 
x$species.logbook <- gsub("dollar[ds]*", "dollar", x$species.logbook)
x$species.logbook <- gsub("etoi[l]+e", "etoile", x$species.logbook)  
x$species.logbook <- gsub("flound$", "flounder", x$species.logbook) 
x$species.logbook[x$species.logbook == "hahe"] <- "hake"
x$species.logbook <- gsub("hya[s]*", "hyas", x$species.logbook) 
x$species.logbook <- gsub("hy$", "hyas", x$species.logbook) 
x$species.logbook <- gsub("h[ae][r]+[ie]ng", "herring", x$species.logbook) 
x$species.logbook <- gsub("haddeck", "haddock", x$species.logbook) 
x$species.logbook <- gsub("h[eo]lot[h]*urie", "holothurie", x$species.logbook) 
x$species.logbook <- gsub("hermit[e]*[ ]+crab", "hermit crab", x$species.logbook)  
x$species.logbook[rep(c("hermit", "crab"), x$species.logbook)] <- "hermit crab"  

x$species.logbook <- gsub("gelatinou ", "gelatinous ", x$species.logbook)  
x$species.logbook[grep("laminaire", x$species.logbook) ] <- "laminaria"
x$species.logbook <- gsub("l[a]mp[eo]nie", "lompenie", x$species.logbook) 

x$species.logbook <- gsub("morue thoma ", "morue thomas", x$species.logbook)
x$species.logbook <- gsub("morue thoma$", "morue thomas", x$species.logbook)
x$species.logbook <- gsub("nerei[s]*[ ]+viren[s]*", "nereis virens", x$species.logbook) 

x$species.logbook[x$species.logbook %in% c("oursin mou", "ousin", "oursin etoile")]   <- "oursin"

x$species.logbook[x$species.logbook == "pep"]   <- "pepin"
x$species.logbook[deblank(x$species.logbook) == "pou"]   <- "puce de mer"
x$species.logbook <- gsub("pa[l]+[ou]+rde", "palourde", x$species.logbook)  
x$species.logbook <- gsub("potato.", "potato", x$species.logbook) 
x$species.logbook <- gsub("pandalu$", "pandalus", x$species.logbook)  
x$species.logbook <- gsub("p[ao]lype", "polyp", x$species.logbook)
x$species.logbook <- gsub("^blie", "plie", x$species.logbook)

x$species.logbook <- gsub("skete egg", "skate egg", x$species.logbook)
x$species.logbook <- gsub("serpen ", "serpent ", x$species.logbook)
x$species.logbook <- gsub("souri ", "souris ", x$species.logbook)

x$species.logbook[x$species.logbook == "crev"]  <- "crevette"
x$species.logbook[x$species.logbook %in% c("mor", "moru")]  <- "morue"
x$species.logbook[x$species.logbook == "barb"]  <- "barbu"
x$species.logbook[x$species.logbook == "ophiu"] <- "ophiure"  
x$species.logbook[x$species.logbook == "squi"]  <- "squid"  
x$species.logbook[x$species.logbook == "pillot"]  <- "morue pilote"  
 
x$species.logbook[x$species.logbook %in% c("our", "sea urchin type b", "sea urch")]  <- "sea urchin"  

x$species.logbook[x$species.logbook == "pito"] <- "piteau"
x$species.logbook <- gsub("je[l]+y[ ]+fish", "jellyfish", x$species.logbook)
x$species.logbook <- gsub("cohaugs", "quahog", x$species.logbook)  
x$species.logbook <- gsub("quohag", "quahog", x$species.logbook)   
x$species.logbook <- gsub("sea[ ]rav[iae]*n", "sea raven", x$species.logbook) 
 
x$species.logbook <- gsub("s[ao]l[ao]ster[e]*", "solaster", x$species.logbook) 
x$species.logbook <- gsub("spinny", "spiny", x$species.logbook) 

x$species.logbook <- gsub("sea mouce", "sea mouse", x$species.logbook)  
x$species.logbook <- gsub(" tigle", " trigle", x$species.logbook) 
x$species.logbook <- gsub("te[t]+ard", "tetard", x$species.logbook)
x$species.logbook <- gsub("3 spined", "three-spined", x$species.logbook) 
x$species.logbook <- gsub("turbo.*", "turbot", x$species.logbook) 
x$species.logbook <- gsub("yellow tail", "yellowtail", x$species.logbook) 
x$species.logbook <- gsub("welk ", "whelk ", x$species.logbook) 
x$species.logbook[x$species.logbook == "irish mos"] <- "irish moss" 


# Save species logbook names:
x$species.name.logbook <- x$species.logbook

# Keyword substitutions:
x$species.logbook[intersect(grep("palourde", x$species.logbook), grep("mer", x$species.logbook))]   <- "palourde"
x$species.logbook[x$species.logbook == "eponges(video)"]                                            <- "eponge"
x$species.logbook[intersect(grep("etoile", x$species.logbook), grep("brittle", x$species.logbook))] <- "ophiure"
x$species.logbook[intersect(grep("etoile", x$species.logbook), grep("violet", x$species.logbook))]  <- "solaster"
x$species.logbook[intersect(grep("chaboisse", x$species.logbook), grep("18", x$species.logbook))]   <- "longhorn sculpin"
x$species.logbook[intersect(grep("tetard", x$species.logbook), grep("sculpin", x$species.logbook))] <- "sculpin"
x$species.logbook[intersect(grep("skate", x$species.logbook), grep("thorny", x$species.logbook))]   <- "thorny skate"
x$species.logbook[intersect(grep("basket", x$species.logbook), grep("star", x$species.logbook))]    <- "basketstar"
x$species.logbook[intersect(rep("[eé]toile", x$species.logbook), rep(c("grosse", "mer", "branche", "filament", "fine", "carr", "brain", "patte", "non", "speci", "tentacule"), x$species.logbook, and = FALSE))] <- "etoile"
x$species.logbook[intersect(rep("[eé]ponge", x$species.logbook), rep(c("feuille", "puant", "grosse", "branche", "pue", "filament", "fine", "carr", "brain", "patte", "non", "speci", "tentacule"), x$species.logbook, and = FALSE))] <- "eponge"
x$species.logbook[intersect(rep("mollusque", x$species.logbook), rep(c("toute", "sorte"), x$species.logbook, and = FALSE))] <- "mollusque"
x$species.logbook[intersect(grep("moule", x$species.logbook), grep("coque", x$species.logbook))] <- "bivalve"

x$species.logbook[intersect(grep("coque", x$species.logbook), grep("vide", x$species.logbook))]   <- "empty shells"
x$species.logbook[intersect(grep("arctica", x$species.logbook), grep("vide", x$species.logbook))] <- "empty shells"
x$species.logbook[intersect(grep("clam", x$species.logbook), grep("shell", x$species.logbook))]   <- "empty shells"

x$species.logbook[intersect(rep("poisson", x$species.logbook), rep(c("non", "sorte", "petit"), x$species.logbook, and = FALSE))] <- "poisson"
x$species.logbook[intersect(grep("bigorneau", x$species.logbook), grep("plusieur", x$species.logbook))] <- "whelk"
x$species.logbook[intersect(grep("bigorneau", x$species.logbook), grep("oeuf", x$species.logbook))]     <- "whelk eggs"
x$species.logbook[intersect(grep("bigorneau", x$species.logbook), grep("oeuf", x$species.logbook))]     <- "whelk eggs"
x$species.logbook[rep(c("whelk", "egg"), x$species.logbook)]                                            <- "whelk eggs"

x$species.logbook[rep(c("skate", "egg"), x$species.logbook)]                                            <- "skate eggs"
x$species.logbook[rep(c("oeuf", "raie"), x$species.logbook)]                                            <- "skate eggs"

x$species.logbook[intersect(grep("large", x$species.logbook), grep("star", x$species.logbook))]         <- "starfish"
x$species.logbook[intersect(grep("long", x$species.logbook), grep("sponge", x$species.logbook))]        <- "sponge"
x$species.logbook[x$species.logbook == "lumpsucker"]                                                    <- "spiny lumpsucker"
x$species.logbook[intersect(grep("poule", x$species.logbook), grep("petite", x$species.logbook))]       <- "spiny lumpsucker"
x$species.logbook[intersect(grep("merluche", x$species.logbook), grep("morue", x$species.logbook))]     <- "gadiformes"


x$species.logbook[grep("barbu", x$species.logbook)] <- "white hake"
x$species.logbook[x$species.logbook %in% c("merluche", "brosme")]    <- "white hake"
x$species.logbook[grep("plaice", x$species.logbook)] <- "American plaice"

x$species.logbook[setdiff(union(grep("winkle", x$species.logbook), grep("whelk", x$species.logbook)), grep("egg", x$species.logbook))] <- "whelk"
x$species.logbook[rep("pieuvre", x$species.logbook)] <- "octopus"
x$species.logbook[rep(c("stimpson", "clam"), x$species.logbook)]   <- "Stimpson's surf clam"
x$species.logbook[rep(c("stimpson", "mactre"), x$species.logbook)] <- "Stimpson's surf clam"

x$species.logbook[grep("nerei[sd]", x$species.logbook)] <- "nereis"

# Special cases:
x$species.logbook[x$species.logbook == "flounder(plie blanche)"] <- "American plaice"
x$species.logbook[x$species.logbook == "greysole(plie grise)"]   <- "witch flounder" 
x$species.logbook[grep("grey[ ]*sole", x$species.logbook)]       <- "witch flounder" 
x$species.logbook[rep(c("lompenie", "serpent"), x$species.logbook)] <- "slender eelblenny" 

# Remove terms in parentheses:
x$species.logbook <- gsub("\\([a-z '0-9?]+\\)", "", x$species.logbook)
x$species.logbook <- gsub("[?]", "", x$species.logbook)

# Remove unknown species:
x$species.logbook[grep("llisible", x$species.logbook)] <- ""               

# Standardize species names:
x$species.logbook[setdiff(grep("arctica", x$species.logbook), grep("coquille", x$species.logbook))] <- "arctica islandica"
x$species.logbook[rep(c("dollar", "sable"), x$species.logbook)] <- "sand dollar"
x$species.logbook[x$species.logbook == "crapaud"] <- "crapaud de mer"
x$species.logbook[rep(c("concombre", "mer"), x$species.logbook)] <- "concombre de mer"
x$species.logbook[x$species.logbook == "concombre"] <- "concombre de mer"
x$species.logbook[grep("corne", x$species.logbook)] <- "basketstar"
x$species.logbook[rep(c("morue", "pilote"), x$species.logbook)] <- "morue de roche"
x$species.logbook[grep("tetard", x$species.logbook)] <- "seasnail"
x$species.logbook[grep("terassier", x$species.logbook)] <- "cunner"
x$species.logbook <- gsub("crapeau", "crapaud", x$species.logbook)
x$species.logbook <- gsub("pelotte de mer", "sea mouse", x$species.logbook) 
x$species.logbook[rep(c("eponge", "main"), x$species.logbook)] <- "dead man's fingers"
x$species.logbook[rep(c("eponge", "mer"), x$species.logbook)] <- "eponge"
x$species.logbook[rep(c("eponge", "algue"), x$species.logbook)] <- "eponge"
x$species.logbook[rep(c("gosse", "mer"), x$species.logbook)] <- "sea potato"
x$species.logbook[x$species.logbook == "patate"]             <- "sea potato"

# Fix spacing issues:
x$species.logbook <- deblank(x$species.logbook)

# Empty shells:
x$species.logbook[intersect(rep("vide", x$species.logbook), rep(c("moule", "clam"), x$species.logbook, and = FALSE))] <- "empty shell"
x$species.logbook[grep("coquille", x$species.logbook)] <- "empty shells"

# Fish species:
x$species.logbook[x$species.logbook %in% c("morue", "morue franche", "atlantic cod")]     <- "cod"
x$species.logbook[x$species.logbook %in% c("tommy cod", "morue thomas")]  <- "tomcod"
x$species.logbook[x$species.logbook %in% c("morue charboniere", "morue charbonniere", "pepin", "small green cod", "rock cod", "morue de roche", "black cod")] <- "Greenland cod"

x$species.logbook[x$species.logbook == "hake"]                  <- "white hake"
x$species.logbook[x$species.logbook == "fletan"]                <- "halibut"

x$species.logbook[x$species.logbook %in% c("plie du canada", "plie blanche", "plie canadienne", "americain plaice", "plie americaine")] <- "American plaice"
x$species.logbook[x$species.logbook %in% c("plie jaune", "yellowtail", "plie a queue jaune", "limande a queue jaune")] <- "yellowtail flounder"

x$species.logbook[x$species.logbook == "plie rouge"]            <- "winter flounder"
x$species.logbook[x$species.logbook == "plie grise"]            <- "witch flounder"
x$species.logbook[x$species.logbook %in% c("plie", "place")]    <- "plaice"

x$species.logbook[x$species.logbook == "plie grise et blanche"] <- "flatfish"
x$species.logbook[x$species.logbook == "plie blanche et grise"] <- "flatfish"
x$species.logbook[x$species.logbook == "plie grise canadienne"] <- "flatfish"
x$species.logbook[x$species.logbook == "fletan du groenland"]   <- "turbot"
x$species.logbook[x$species.logbook == "small flounder"]        <- "flounder"

x$species.logbook[x$species.logbook %in% c("ocean perch", "poisson rouge", "red fish", "sebaste")] <- "redfish"
x$species.logbook[which(x$date == "1994-08-24" & x$species.logbook == "red")]       <- "redfish"
x$species.logbook[x$species.logbook %in% c("poisson alligator", "alligator fish")]  <- "alligatorfish"

x$species.logbook[x$species.logbook == "crapet de mer"]         <- "crapaud de mer"
x$species.logbook[x$species.logbook == "goberge"]               <- "pollock"
x$species.logbook[x$species.logbook == "poisson"]               <- "fish"
x$species.logbook[x$species.logbook == "quelque poisson"]       <- "fish"
x$species.logbook[x$species.logbook == "aiglefin"]              <- "haddock"

x$species.logbook[x$species.logbook == "capelan"]               <- "capelin"
x$species.logbook[x$species.logbook == "caplan"]                <- "capelin"
x$species.logbook[x$species.logbook %in% c("sea trout", "truite", "truite d'eau salee", "truite de mer")] <- "salmon" 
x$species.logbook[x$species.logbook == "maquereau"]             <- "mackerel"
x$species.logbook[x$species.logbook %in% c("smelt", "eperlan")] <- "rainbow smelt"
x$species.logbook[x$species.logbook == "faux trigle"]           <- "mailed sculpin"
x$species.logbook[x$species.logbook == "agone"]                 <- "sea poacher"

x$species.logbook[x$species.logbook == "poule d'eau"]           <- "Atlantic lumpfish"
x$species.logbook[x$species.logbook == "poule de mer"]          <- "Atlantic lumpfish"
x$species.logbook[x$species.logbook == "lump fish"]             <- "Atlantic lumpfish"
x$species.logbook[x$species.logbook == "loup atlantique"]       <- "Atlantic wolffish"
x$species.logbook[x$species.logbook %in% c("wolf", "loup de mer")] <- "wolffish"

x$species.logbook[x$species.logbook %in% c("baudroie", "baudroie d'amerique", "diable de mer")] <- "monkfish"
x$species.logbook[x$species.logbook == "loche"]                 <- "ocean pout"
x$species.logbook[x$species.logbook == "small & slim eelpout"]  <- "eelpout"
x$species.logbook[x$species.logbook == "lompenie"]              <- "eelblenny"

x$species.logbook[x$species.logbook == "snakeblenny"]           <- "snake blenny" 
x$species.logbook[rep("serpent blenny", x$species.logbook)]     <- "snake blenny"

x$species.logbook[x$species.logbook %in% c("sea snail")]        <- "seasnail"
x$species.logbook[x$species.logbook == "chaboisseau bronze"]     <- "shorthorn sculpin"
x$species.logbook[x$species.logbook == "chaboisseau a 18 epine"] <- "sculpin"
x$species.logbook[x$species.logbook == "chien de mer"]           <- "dogfish"
x$species.logbook[x$species.logbook == "dog fish"]               <- "dogfish"
x$species.logbook[x$species.logbook %in% c("hemitriptere", "crapaud de mer")] <- "sea raven"
x$species.logbook[x$species.logbook == "chaboisseau"]            <- "sculpin" 

x$species.logbook[rep(c("petite", "poule"), x$species.logbook)]      <- "spiny lumpsucker"
x$species.logbook[x$species.logbook == "spring lumpsucker"]          <- "spiny lumpsucker"
x$species.logbook[x$species.logbook == "spine belly lumpsucker"]     <- "spiny lumpsucker"
x$species.logbook[rep(c("chat de mer"), x$species.logbook)]          <- "fourline snakeblenny"
x$species.logbook[rep(c("catfish"), x$species.logbook)]              <- "fourline snakeblenny"
x$species.logbook[rep(c("poisson chat"), x$species.logbook)]              <- "fourline snakeblenny"
x$species.logbook[rep(c("round nose grenadier"), x$species.logbook)] <- "roundnose grenadier"
x$species.logbook[rep(c("grenadier", "roche"), x$species.logbook)]   <- "grenadier"

x$species.logbook[x$species.logbook %in% c("raie", "raie blanche", "raie grise")]   <- "skate" 
x$species.logbook[x$species.logbook %in% c("raie epineuse", "spiny skate")]         <- "thorny skate" 
x$species.logbook[x$species.logbook %in% c("raie lisse")]                           <- "smooth skate" 

# Crustaceans:
x$species.logbook[grep("homard", x$species.logbook)]         <- "American lobster"
x$species.logbook[x$species.logbook == "bernard l'hermite"]  <- "hermit crab"
x$species.logbook[x$species.logbook == "bernard hermit"]     <- "hermit crab"
x$species.logbook[x$species.logbook == "bernard hermite"]    <- "hermit crab"

x$species.logbook[x$species.logbook %in% c("thorny crab", "crabe epineux")]      <- "northern stone crab"
x$species.logbook[x$species.logbook %in% c("creve", "crevette blanche", "red shrimp", "pink shrimp", "white shrimp", "crevette")] <- "shrimp"
x$species.logbook[x$species.logbook %in% c("pandalus", "crevette pandalus")] <- "Pandalus borealis"
x$species.logbook[x$species.logbook == "casque de police"]   <- "toad crab"
x$species.logbook[x$species.logbook == "balane"]             <- "barnacle"
x$species.logbook[x$species.logbook == "hyas aneanu"]        <- "hyas araneus"
x$species.logbook[x$species.logbook == "araignee de mer"]    <- "sea spider"
x$species.logbook[x$species.logbook == "ecrevisse"]          <- "crangon"

# Molluscs:
x$species.logbook[grep("palourde noir", x$species.logbook)]  <- "arctica islandica" 
x$species.logbook[x$species.logbook %in% c("piteau")] <- "propeller clam"
x$species.logbook[x$species.logbook == "huitre"]             <- "oyster" 
x$species.logbook[x$species.logbook == "limace"]             <- "sea slug"
x$species.logbook[x$species.logbook == "limace arc en ciel"] <- "sea slug"
x$species.logbook[x$species.logbook == "mollusque"]          <- "mollusk"
x$species.logbook[x$species.logbook == "mollusque vide"]     <- "empty shells" 
x$species.logbook[x$species.logbook == "petoncle"]           <- "scallop"
x$species.logbook[x$species.logbook == "petoncle geante"]    <- "giant scallop"
x$species.logbook[x$species.logbook %in% c("iceland scallop", "petoncle d'islande")] <- "Iceland scallop"
x$species.logbook[x$species.logbook %in% c("cockle d'islande", "coque d'islande", "iceland clam")]   <- "Iceland cockle"
x$species.logbook[x$species.logbook == "buccin"]             <- "whelk"
x$species.logbook[x$species.logbook == "bigorneau"]          <- "whelk"
x$species.logbook[x$species.logbook == "oeuf de buccin"]     <- "whelk eggs" 
x$species.logbook[x$species.logbook == "oeuf de bigorneau"]  <- "whelk eggs"
x$species.logbook[x$species.logbook == "octopu"]             <- "octopus"
x$species.logbook[x$species.logbook == "pieuvre"]            <- "octopus"
x$species.logbook[x$species.logbook == "calmar"]             <- "squid"
x$species.logbook[x$species.logbook %in% c("sea clam", "coque noir", "palourde", "espece de palourde", "coque", "bar")] <- "clam"
x$species.logbook[x$species.logbook == "oeuf de palourde"]   <- "clam eggs" 
x$species.logbook[x$species.logbook == "moule noir"]         <- "mussel"
x$species.logbook[x$species.logbook == "moule"]              <- "mussel"
x$species.logbook[rep(c("ecaille", "palou"), x$species.logbook)] <- "empty shells"
x$species.logbook[rep(c("ecaille", "peton"), x$species.logbook)] <- "empty shells"
x$species.logbook[x$species.logbook %in% c("snail")]             <- "snail"

# Miscellaneous invertebrates:
x$species.logbook[x$species.logbook %in% c("copepode", "copepode(pou de mer)", "pou de mer", "poux de mer", "puce de mer", "puceron")] <- "amphipod"

x$species.logbook[x$species.logbook %in% c("poisson du soleil", "meduse", "poisson de soleil", "soleil de mer")] <- "jellyfish"  
x$species.logbook[x$species.logbook == "sponge"]             <- "sea sponge" 
x$species.logbook[rep("sea star", x$species.logbook)]        <- "starfish"
x$species.logbook[x$species.logbook %in% c("sun star", "solaster", "etoile solaster")]  <- "sunstar" 
x$species.logbook[x$species.logbook == "star fish"]          <- "starfish"
x$species.logbook[x$species.logbook == "etoile de"]          <- "starfish"
x$species.logbook[x$species.logbook == "anemone"]            <- "sea anemone"  
x$species.logbook[x$species.logbook %in% c("dollar", "dollar de mer")] <- "sand dollar"
x$species.logbook[x$species.logbook == "etoile"]             <- "starfish"
x$species.logbook[x$species.logbook == "holothurie"]         <- "sea cucumber"
x$species.logbook[x$species.logbook == "concombre de mer"]   <- "sea cucumber"
x$species.logbook[x$species.logbook %in%  c("oursin", "oursin vert")]  <- "sea urchin"
x$species.logbook[x$species.logbook == "oursin de mer"]      <- "sea urchin"
x$species.logbook[x$species.logbook == "ophiure"]            <- "brittle star"
x$species.logbook[x$species.logbook == "ophiure de mer"]     <- "brittle star"
x$species.logbook[x$species.logbook == "eponge"]             <- "sea sponge"
x$species.logbook[x$species.logbook == "ophiure et eponge"]  <- "eponge et ophiure"
x$species.logbook[x$species.logbook == "eponge et ophiure"]  <- "sponges and brittlestars"
x$species.logbook[x$species.logbook == "etoile eponge"]      <- "sponges and starfish"
x$species.logbook[x$species.logbook == "mousse"]             <- "bryozoan" 
x$species.logbook[grep("balane", x$species.logbook)]         <- "barnacle"  
x$species.logbook[grep("corail", x$species.logbook)]         <- "coral"  
x$species.logbook[grep("coraux", x$species.logbook)]         <- "coral"  

x$species.logbook[grep("patate de mer", x$species.logbook)]  <- "sea potato"   
x$species.logbook[x$species.logbook == "patate"]             <- "sea potato"   

x$species.logbook[grep("cocon", x$species.logbook)]          <- "whelk eggs"  
x$species.logbook[grep("sea mice", x$species.logbook)]       <- "sea mouse"  
x$species.logbook[x$species.logbook %in% c("souris de mer")] <- "sea mouse" 
x$species.logbook[x$species.logbook %in% c("oeuf de sangsue", "larve de sangsue", "leech", "sea centipede", "ver", "ver turbicole", "sea worm", "ver de mer", "verre de mer")] <- "annelid"  

# Algae:
x$species.logbook[x$species.logbook %in% c("algue marine", "algue verte", "algue", "kelp", "seaweed")]  <- "algae" 
x$species.logbook[x$species.logbook %in% c("algue laminaria")]           <- "laminaria" 
x$species.logbook[grep("laminaire", x$species.logbook)]                  <- "laminaria" 
x$species.logbook[grep("laminaria", x$species.logbook)]                  <- "laminaria"

# Delete niaisage:
ix <- which(x$species.logbook %in% c("cracha d'admiral"))
if (length(ix) > 0) x <- x[-ix, ]

# Initialize species coding:
x$species <- NA

# Fish species coding:
x$species[x$species.logbook == "cod"]                 <- 10 
x$species[x$species.logbook == "tomcod"]              <- 17
x$species[x$species.logbook == "haddock"]             <- 11  
x$species[x$species.logbook == "pollock"]             <- 16 
x$species[x$species.logbook == "gadiformes"]          <- 18 # "Gadiformes"
x$species[x$species.logbook == "halibut"]             <- 30
x$species[x$species.logbook == "Greenland cod"]       <- 118
x$species[x$species.logbook == "mackerel"]            <- 70
x$species[x$species.logbook == "flounder"]            <- 49

x$species[x$species.logbook == "skipjack"]            <- 172

x$species[x$species.logbook == "flatfish"]            <- 346 
x$species[x$species.logbook == "turbot"]              <- 31
x$species[x$species.logbook == "salmon"]              <- 980 # "Salmon, trouts, etc."
x$species[x$species.logbook == "grenadier"]           <- 416 
x$species[x$species.logbook == "roundnose grenadier"] <- 414 
x$species[x$species.logbook == "sea raven"]           <- 320 
x$species[x$species.logbook == "thorny skate"]        <- 201
x$species[grep("gasp[ea]reau", x$species.logbook)]    <- 62

x$species[grep("redfish", x$species.logbook)]         <- 20
x$species[grep("American plaice", x$species.logbook)] <- 40
x$species[grep("winter flounder", x$species.logbook)] <- 43
x$species[grep("witch flounder", x$species.logbook)]  <- 41
x$species[grep("yellowtail", x$species.logbook)]      <- 42
x$species[grep("white hake", x$species.logbook)]      <- 12
x$species[grep("red hake", x$species.logbook)]        <- 13
x$species[grep("rainbow smelt", x$species.logbook)]   <- 63 
x$species[grep("lumpfish", x$species.logbook)]        <- 501  
x$species[grep("cunner", x$species.logbook)]          <- 122
x$species[grep("herring", x$species.logbook)]         <- 60
x$species[grep("monkfish", x$species.logbook)]        <- 400
x$species[x$species.logbook == "skate"]               <- 211 # Skates unsp.
x$species[x$species.logbook == "smooth skate"]        <- 202 
x$species[x$species.logbook == "plaice"]              <- 346 # Flatfish unsp. 
x$species[x$species.logbook == "fish"]                <- 90  # Unsp. fish"
x$species[x$species.logbook == "sculpin"]             <- 311 # Sculpin unsp."
x$species[x$species.logbook == "mailed sculpin"]      <- 304 
x$species[x$species.logbook == "sea poacher"]         <- 350
x$species[x$species.logbook == "plaice"]              <- 346 # Flatfish unsp. 
x$species[x$species.logbook == "capelin"]             <- 64
x$species[x$species.logbook == "dogfish"]             <- 274 
x$species[x$species.logbook == "alligatorfish"]       <- 340 
x$species[x$species.logbook == "Atlantic wolffish"]   <- 50
x$species[x$species.logbook == "wolffish"]            <- 59  # "Wolffish, unsp."
x$species[x$species.logbook == "shorthorn sculpin"]   <- 301
x$species[x$species.logbook == "spatulate sculpin"]   <- 314    
x$species[x$species.logbook == "three-spined stickleback"] <- 361

x$species[x$species.logbook == "longhorn sculpin"]    <- 300    
x$species[x$species.logbook == "ocean pout"]          <- 640                 
x$species[x$species.logbook == "slender eelblenny"]   <- 631    

x$species[x$species.logbook == "American eel"]        <- 600
x$species[x$species.logbook %in% c("anguille", "eel")]<- 634 # "Unsp. eels"
x$species[x$species.logbook == "fish doctor"]         <- 616  
x$species[x$species.logbook == "eelpout"]             <- 598 # "Eelpouts unsp."
x$species[x$species.logbook == "spiny lumpsucker"]    <- 502
x$species[x$species.logbook == "snake blenny"]        <- 622 
x$species[x$species.logbook == "fourline snakeblenny"]<- 626
x$species[x$species.logbook == "dusky seasnail"]      <- 512
x$species[x$species.logbook == "seasnail"]            <- 500 # "Seasnail unsp." 
x$species[x$species.logbook == "gelatinous seasnail"] <- 505 # "Seasnail, gelatinous" 
x$species[x$species.logbook == "skate eggs"]          <- 1224 

# Crustacean species coding:
x$species[x$species.logbook == "American lobster"]       <- 2550
x$species[x$species.logbook %in% c("rock crab", "crabe commun", "crabe de roche")] <- 2513
x$species[grep("hermit crab", x$species.logbook)]        <- 2560
x$species[x$species.logbook == "hyas"]                   <- 2520
x$species[x$species.logbook %in% c("tiger shrimp", "shrimp")] <- 2100 
x$species[x$species.logbook == "hyas araneus"]           <- 2527 
x$species[x$species.logbook == "hyas coarctatus"]        <- 2521
x$species[x$species.logbook == "toad crab"]              <- 2520
x$species[x$species.logbook == "northern stone crab"]    <- 2523
x$species[x$species.logbook == "Pandalus borealis"]      <- 2210
x$species[x$species.logbook == "crangon"]                <- 2400 # Crangonidae f.

# Molluscan species coding:
x$species[x$species.logbook == "propeller clam"]         <- 4310 # Clams unsp.
x$species[x$species.logbook %in% c("couteau", "razor clam")]  <- 4315
x$species[x$species.logbook == "bar clam"]               <- 4317 # Clams unsp.
x$species[x$species.logbook == "sea slug"]               <- 4400
x$species[x$species.logbook == "snail"]                  <- 4200 # "Snails and slugs" 
x$species[x$species.logbook == "mollusk"]                <- 4000 # "Mollusca p."
x$species[x$species.logbook == "whelk"]                  <- 4210 
x$species[x$species.logbook == "whelk eggs"]             <- 1510
x$species[x$species.logbook == "bivalve"]                <- 4300 # "Bivalvia c." 
x$species[x$species.logbook == "empty shells"]           <- 4348
x$species[x$species.logbook == "arctica islandica"]      <- 4304 # Ocean quahog
x$species[x$species.logbook == "clam"]                   <- 4310 # "Clams unsp."
x$species[x$species.logbook == "scallop"]                <- 4320 # scallop"
x$species[x$species.logbook == "giant scallop"]          <- 4321 # Giant scallop"
x$species[x$species.logbook == "Iceland scallop"]        <- 4322 # Iceland scallop"
x$species[x$species.logbook == "squid"]                  <- 4514 # "Squid unsp."
x$species[x$species.logbook == "octopus"]                <- 4520
x$species[x$species.logbook == "cockle"]                 <- 4340
x$species[x$species.logbook == "mussel"]                 <- 4330
x$species[x$species.logbook == "Iceland cockle"]         <- 4342
x$species[x$species.logbook == "Stimpson's surf clam"]   <- 4355
x$species[x$species.logbook == "oyster"]                 <- 4326 # "American cupped oyster"

# Miscellaneous species coding:
x$species[x$species.logbook == "nematode"]                  <- 7100
x$species[x$species.logbook %in% c("nereis", "nereis virens")] <- 3130
x$species[x$species.logbook == "annelid"]                  <- 3000
x$species[x$species.logbook == "sea potato"]               <- 1823
x$species[x$species.logbook == "bryozoan"]                 <- 1900
x$species[x$species.logbook == "sunstar"]                  <- 6121  # Solaster
x$species[x$species.logbook == "sea mouse"]                <- 3200
x$species[x$species.logbook == "amphipod"]                 <- 2801
x$species[x$species.logbook == "barnacle"]                 <- 2990
x$species[x$species.logbook == "sea urchin"]               <- 6400 
x$species[x$species.logbook == "sand dollar"]              <- 6500 
x$species[x$species.logbook == "sea cucumber"]             <- 6600 
x$species[x$species.logbook == "brittle star"]             <- 6200 
x$species[x$species.logbook == "basketstar"]               <- 6300 
x$species[x$species.logbook == "starfish"]                 <- 6100 # "Asteroidea s.c."
x$species[x$species.logbook == "sponges and brittlestars"] <- 1701 # "Marine invertebrates unsp."
x$species[x$species.logbook == "sea sponge"]               <- 8600
x$species[x$species.logbook == "laminaria"]                <- 9321
x$species[x$species.logbook == "algae"]                    <- 9300
x$species[x$species.logbook == "sea anemone"]              <- 8300
x$species[x$species.logbook %in% c("polyp", "polyps")]     <- 8200
x$species[x$species.logbook == "coral"]                    <- 8530 # "Sea corals unsp."
x$species[x$species.logbook == "dead man's fingers"]       <- 8336
x$species[x$species.logbook == "jellyfish"]                <- 8500 
x$species[x$species.logbook == "sea spider"]               <- 5100
x$species[x$species.logbook == "sponges and starfish"]     <- 1701
x$species[x$species.logbook == "clam eggs"]                <- 1500  # "Mollusc eggs unsp."
x$species[grep("irish moss", x$species.logbook)]           <- 9332 

unique(x[which(is.na(x$species)), "species.logbook"]) 
x[is.na(x$species), ]

# Re-order fields:
remove <- c("year", "month", "day", "species.logbook")
x <- x[setdiff(names(x), remove)]
vars <- c("date", "tow.id", "tow.number", "species", "species.name.logbook")
x <- x[c(vars, setdiff(names(x), c(vars, "comment")), "comment")]

# Delete lines with missing IDs:
# Doublecheck these in the logbook:
ix <- which(is.na(x$species) & (is.na(x$number.caught) | x$number.caught == 0) & (x$species.name == ""))
if (length(ix) > 0) x <- x[-ix, ]

ix <- which(is.na(x$species) & x$species.name.logbook == "")
#if (length(ix) > 0) x <- x[-ix, ]

# Remove tow.id:
x <- x[setdiff(names(x), "tow.id")]
#x$tow.id <- paste0("GP", gsub(" ", "0", formatC(as.numeric(gsub("S89", "", x$tow.id)), width = 3)))

# Fix tow.id format:
#x$tow.id <- paste0("GP", gsub(" ", "0", formatC(as.numeric(gsub("S89", "", x$tow.id)), width = 3)))

# Fix index key:
ux <- unique(x[c("date", "tow.number", "species")])
remove <- NULL
for (i in 1:nrow(ux)){
   ix <- which((x$date == ux$date[i]) & (x$tow.number == ux$tow.number[i]) & (x$species == ux$species[i]))
   if (length(ix) > 1){
      print(x[ix, ])
      if (all(is.na(as.numeric(x$number.caught[ix])))){
         s <- x$number.caught[ix]
         s[is.na(s)] <- ""
         s <- paste(s, collapse = " + ")
         s <- gsub(" [+] $", "", s)
         s <- gsub(" [+] $", "", s)
         if (length(s) == 0) s <- ""
      }else{
         s <- sum(as.numeric(x$number.caught[ix]), na.rm = TRUE)
      }  
      print(s)
      x$number.caught[ix[1]] <- s
      x$species.name.logbook[ix[1]] <- paste(unique(x$species.name.logbook[ix]), collapse = " & ")
      x$comment[ix[1]] <- paste(unique(x$comment[ix]), collapse = "; ")
      remove <- c(remove, ix[2:length(ix)])
   }
}
if (length(remove) > 0) x <- x[-remove, ]


# Comment clean-up:
x$comment <- gsub("^; ", "", x$comment)
x$comment <- gsub(";[ ]*$", "", x$comment)

x$species.name.logbook <- gulf.utils::deblank(x$species.name.logbook)

# Write data:
path <- paste0(unlist(strsplit(getwd(), "gulf"))[1], "gulf.data/inst/extdata/")
write.csv(x, file = paste0(path, paste0("scs.cat.", year, ".csv")), row.names = FALSE)

print(unique(x[which(is.na(x$species)), "species.logbook"]) )
print(x[is.na(x$species), ])

print(aggregate( x$species.name.logbook, by = list(species(x$species)), function(x) paste(unique(x), collapse = "; ")))
TobieSurette/gulf.manage documentation built on Feb. 10, 2024, 10:51 p.m.