R/clean_data.R

globalVariables(c("DAY", "MONTH","LATITUDE","LONGITUDE","EQ_PRIMARY","LOCATION_NAME","TOTAL_DEATHS","d","YEAR","COUNTRY"))

#' Cleaning historical data
#'
#' This function clean historical data and prepare it for next steps in analysis
#'
#' This function creates a vector of HTML-formatted labels using supplied
#' \itemize{
#'  \item DAY
#'  \item linetype
#'  \item MONTH
#'  \item LATITUDE
#'  \item LONGITUDE
#'  \item EQ_PRIMARY
#'  \item TOTAL_DEATHS
#'  \item YEAR
#'  \item d
#' }
#'
#' @param data A data frame with NOAA Significant Earthquake Database
#' @return cleaned and transformed data frame for further  analysis
#' @importFrom dplyr %>% 
#' @export
#' @examples
#' library(dplyr); library(lubridate)
#' NOAA_quakes%>%
#' clear_data()

clear_data<-function(data){
  q1<-data%>%
    dplyr:: mutate_if(is.integer,as.numeric)%>%
    dplyr::mutate(DAY=dplyr::if_else(is.na(DAY),1,DAY),
           MONTH=dplyr::if_else(is.na(MONTH),1,MONTH),
           LATITUDE=as.numeric(LATITUDE),
           LONGITUDE=as.numeric(LONGITUDE),
           EQ_PRIMARY=as.numeric(EQ_PRIMARY),
           TOTAL_DEATHS=as.numeric(TOTAL_DEATHS))

  for(i in 1:nrow(q1)){
    if(q1$YEAR[i]<0 ){
      if(nchar(q1$YEAR[i])==2){  q1$YEAR[i]<-paste0( "-000",unlist(stringr::str_split(q1$YEAR[i],"-"))[2])}
      if(nchar(q1$YEAR[i])==3){  q1$YEAR[i]<-paste0( "-00",unlist(stringr::str_split(q1$YEAR[i],"-"))[2])}
      if(nchar(q1$YEAR[i])==4){  q1$YEAR[i]<-paste0( "-0",unlist(stringr::str_split(q1$YEAR[i],"-"))[2])}
    }
  }
  q1<-q1%>%
    dplyr::mutate(date=paste0(DAY,"/",MONTH,"/",YEAR))
  q1$d<-0
  for( i in 1:nrow(q1)){
    if(q1$YEAR[i]<0){
      minus=as.numeric(lubridate::dmy(q1$date[i]))
      x<-as.numeric(lubridate::ymd("0000-1-1"))
      q1$d[i]=x*2-minus
    }else{q1$d[i]=as.numeric(as.Date(q1$date[i],format = "%d/%m/%Y"))
    }
  }
  q1$DATE<-as.Date(q1$d,origin = "1970-01-01")
  q1<-q1%>%
    select(-d)
  return(q1)
}

#' Cleaning Location Name
#' 
#' This function clean historical data from Country Name without extra spaces
#'
#' @param data  A data frame with NOAA Significant Earthquake Database
#' @return cleaned from Country Name data frame
#' @importFrom dplyr %>%
#' @export
#'
#' @examples
#' data(NOAA_quakes)
#' clean_LC<-eq_location_clean(NOAA_quakes)
eq_location_clean<-function(data){
  res<-data%>%
    dplyr::mutate(LOCATION_NAME =purrr::map2_chr(COUNTRY, LOCATION_NAME,
                                   function(COUNTRY, LOCATION_NAME) {
                                     gsub(paste0(COUNTRY, ":"), '', LOCATION_NAME)
                                   }),
           LOCATION_NAME=stringr::str_trim(LOCATION_NAME),
           LOCATION_NAME=stringr::str_to_title(LOCATION_NAME))
}


#' Load Clean NOAA dataset
#' 
#' This function make absolute clean data for futhure analysis
#' 
#' @param data  A data frame with NOAA Significant Earthquake Database
#' @return cleaned and ready for further  analysis data frame
#' @export
#' @importFrom dplyr %>%
#' @examples
#' data(NOAA_quakes)
#' prepared_data<-eq_clean_data(NOAA_quakes)
eq_clean_data<-function(data){
  res<-data%>%clear_data()%>%eq_location_clean()
}
jyjek/jyjekNOAA documentation built on May 7, 2019, 10:52 p.m.