knitr::opts_chunk$set(echo = TRUE)
library(epiuf)
library(kableExtra)

Epifunction : verifySpelling

This document show examples of epiFunction use

ext_file <- externalFile("testbrand.csv")
# file is created in UTF-8 
df <- read.csv(ext_file,fileEncoding="UTF-8")

We use a brand name list to compare incorrectly spelled values

BrandName <- c("Pfizer","Moderna","Comirnaty","AstraZeneca","Spikevax",
                     "Jansen","Comirnaty (Pfizer/BioNTech)","vaxzevria",
                     "Johnson & Johnson","Pfizer/BioNTech Comirnaty")

verifySpelling("Maderna", BrandName)

We can use verifySpelling with sapply to add a new column containing guessed correct value in the dataset

df$correctBrand <- verifySpelling(df$covvaccbrand_firstdose,BrandName,ErrPerc=0.4)
head(df,10)

We can also use a named list of brand name to regroup value like a recode (But that recode accept similar values instead of equal values)

BrandList <- c("Pfizer"="Pfizer",
               "Pfizer"="Pfizer/BioNTech Comirnaty",
               "Pfizer"="Comirnaty (Pfizer/BioNTech)",
               "Pfizer"="Comirnaty",
               "Moderna"="Moderna",
               "Moderna"="Spikevax",
               "AstraZeneca"="AstraZeneca",
               "AstraZeneca"="Vaxzevria",
               "Janssen"="Janssen",
               "Johnson & Johnson"="Johnson & Johnson"
               )


df$recodedBrand <- verifySpelling(df$covvaccbrand_firstdose,BrandList,ErrPerc=0.4)
head(df,10)

All values which are not matching are NA so we can check them

dfError <- df[is.na(df$correctBrand),]
knitr::kable(dfError,row.names = FALSE) 
freq(dfError$recodedBrand,missing=TRUE)

Result saved to :

write.csv(dfError,file="vignettes/Brand Error.csv")


Epiconcept-Paris/STRAP-epiuf documentation built on Aug. 5, 2024, 3:41 a.m.