gdeltr2: R's modern interface GDELT Project Interface

Documented in filter_sources get_codes_cameo_country get_codes_cameo_ethnic get_codes_cameo_events get_codes_cameo_known_groups get_codes_cameo_religion get_codes_cameo_type get_codes_gcam get_codes_gkg_themes get_urls_gdelt_event_log get_urls_gkg_15_minute_log get_urls_gkg_daily_summaries get_urls_gkg_most_recent_log load_needed_packages

# utilities ---------------------------------------------------------------
#' Filters a gdelt data frame to specified sources
#'
#' @param data gkg or vgkg data frame
#' @param sources vector of sources
#'
#' @return
#' @export
#'
#' @examples
filter_sources <-
  function(data,
           sources = c(
             'netsdaily',
             'realdeal',
             'curbed',
             'law360',
             'dailymail',
             'wsj.com',
             'law360',
             'pehub',
             'techcrunch',
             'washingtonpost',
             'bloomberg',
             'archdaily',
             'espn.com',
             'venturebeat'
           )) {
    sources <-
      sources %>%
      str_to_lower() %>%
      paste0(collapse = '|')
    if (!'documentSource' %in% names(data)) {
      stop("Data must count the documentSource colum")
    }
    data <-
      data %>%
      filter(documentSource %>% str_detect(sources))

    return(data)
  }

.resolve_long_names <-
  function(data) {
    data <-
      data %>%
      mutate(
        numberItem = ifelse(
          item %>% str_detect("idADM"),
          item %>% str_replace_all("idADM1", '') %>% as.character() %>% parse_number(),
          item %>%
            as.character() %>%
            parse_number()
        ),
        numberItem = ifelse(numberItem %>% is.na(), 0 , numberItem),
        item = item %>% str_replace_all("^\\d+|\\d+$", '')
      ) %>%
      distinct() %>%
      suppressWarnings()

    data <-
      data %>%
      spread(item, value) %>%
      suppressWarnings()

    data <-
      data %>%
      mutate_at(data %>% select(
        dplyr::matches(
          "^score|^count|^amount|^value|^face|^angle|^latitude|^longitude|^day|^month|^year|^idTypeLocation"
        )
      ) %>% names,
      funs(. %>% as.character() %>% parse_number()))

    return(data)
  }

#' Loads needed packages
#'
#' @param required_packages vector of pckages to lad
#'
#' @return invisible
#' @export
#'
#' @examples
#' load_neeeded_packages(c('magrittr', 'dplyr))
#'
load_needed_packages <- function(required_packages = c('dplyr')) {
  loaded_packages <- gsub("package:", "", search())
  package_to_load <- required_packages[!required_packages %in%
                                         loaded_packages]
  if (length(package_to_load) > 0) {
    lapply(package_to_load, library, character.only = T)
  }
}


# logs --------------------------------------------------------------------



#' Loads gdelt v2 Global Knowledge Graph master log data, updated every 15 minutes
#'
#' @return
#' @export
#' @import dplyr
#' @import stringr
#' @importFrom lubridate ymd_hms
#' @importFrom lubridate with_tz
#' @importFrom readr read_tsv
#' @importFrom magrittr %>%
#' @importFrom tidyr separate
#' @examples
#' get_urls_gkg_15_minute_log()
get_urls_gkg_15_minute_log <- function() {
  url <-
    'http://data.gdeltproject.org/gdeltv2/masterfilelist.txt'

  log_df <-
    url %>%
    read_tsv(col_names = F) %>%
    separate(
      col = X1,
      into = c('idFile', 'idHash', 'urlData'),
      sep = '\\ '
    ) %>%
    suppressWarnings() %>%
    suppressMessages()

  log_df <-
    log_df %>%
    dplyr::mutate(dateTimeFile = urlData %>% str_replace_all('http://data.gdeltproject.org/gdeltv2/', '')) %>%
    separate(dateTimeFile,
                    into = c('timestamp', 'nameFile', 'typeFile', 'isZip'))

  log_df <-
    log_df %>%
    dplyr::mutate(
      dateTimeData = timestamp %>% as.numeric() %>% lubridate::ymd_hms() %>% lubridate::with_tz(Sys.timezone()),
      dateData = dateTimeData %>% as.Date(),
      typeFile = typeFile %>% str_to_lower(),
      idFile = idFile %>% as.integer()
    ) %>%
    dplyr::mutate_at(.vars = c('idHash', 'nameFile', 'urlData'),
                     .funs = str_trim) %>%
    suppressWarnings()

  return(log_df)

}

#' Gets GDELT Event data, by year from 1979-2005, by year month 2006 - 2013, then by dat
#'
#' @param return_message
#'
#' @return
#' @import stringr
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom readr read_tsv
#' @importFrom tidyr separate
#' @importFrom lubridate ymd
#' @export
#'
#' @examples
#' get_urls_gdelt_event_log()

get_urls_gdelt_event_log <- function(return_message = T) {
  url <-
    'http://data.gdeltproject.org/events/md5sums'

  urlData <-
    url %>%
    read_tsv(col_names = F) %>%
    separate(col = X1,
                    into = c('idHash', 'stemData'),
                    sep = '\\  ') %>%
    dplyr::mutate(
      urlData = 'http://data.gdeltproject.org/events/' %>% paste0(stemData),
      slugDatabaseGDELT = 'EVENTS',
      isZipFile = ifelse(stemData %>% str_detect(".zip"), T, F)
    ) %>%
    suppressWarnings() %>%
    suppressMessages()

  urlData <-
    urlData %>%
    separate(
      col = stemData,
      into = c('periodData', 'nameFile', 'typeFile', 'zip_file'),
      sep = '\\.'
    ) %>%
    dplyr::select(-zip_file) %>%
    dplyr::mutate(
      periodData = ifelse(periodData == 'GDELT', typeFile, periodData),
      isDaysData = ifelse(periodData %>% nchar == 8, T, F)
    ) %>%
    dplyr::select(-c(nameFile, typeFile)) %>%
    suppressWarnings()

  urlData <-
    urlData %>%
    dplyr::filter(isDaysData == F) %>%
    dplyr::mutate(dateData = NA) %>%
    bind_rows(
      urlData %>%
        dplyr::filter(isDaysData == T) %>%
        dplyr::mutate(dateData = periodData %>% lubridate::ymd() %>% as.Date())
    ) %>%
    dplyr::select(idHash,
                  dateData,
                  isZipFile,
                  isDaysData,
                  urlData,
                  everything())

  if (return_message) {
    count.files <-
      urlData %>%
      nrow

    min.date <-
      urlData$dateData %>% min(na.rm = T)

    max.date <-
      urlData$dateData %>% max(na.rm = T)

    "You got " %>%
      paste0(count.files,
             ' GDELT Global Knowledge Graph URLS from ',
             min.date,
             ' to ',
             max.date) %>%
      cat(fill = T)
  }

  return(urlData)
}

#' Gets most recent GKG log URLs
#'
#' @return
#' @import stringr
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom readr read_tsv
#' @importFrom tidyr separate
#' @export
#'
#' @examples
#' get_urls_gkg_most_recent_log()
get_urls_gkg_most_recent_log <- function() {
  log_df <-
    'http://data.gdeltproject.org/gdeltv2/lastupdate.txt' %>%
    read_tsv(col_names = F) %>%
    separate(
      col = X1,
      into = c('idFile', 'idHash', 'urlData'),
      sep = '\\ '
    ) %>%
    suppressWarnings() %>%
    suppressMessages()

  log_df <-
    log_df %>%
    dplyr::mutate(dateTimeFile = urlData %>% str_replace_all('http://data.gdeltproject.org/gdeltv2/', '')) %>%
    separate(dateTimeFile,
                    into = c('timestamp', 'nameFile', 'typeFile', 'isZip')) %>%
    dplyr::mutate(
      typeFile = typeFile %>% str_to_lower(),
      isZip = ifelse(isZip %>% str_detect("ZIP|zip"), T, F),
      idFile = idFile %>% as.integer()
    ) %>%
    mutate_at(c('idHash', 'nameFile', 'urlData'),
              funs(str_trim))

  return(log_df)
}


#' Gets Global Knowledge Graph summary files by day since April 2013
#'
#' @param remove_count_files
#' @param return_message
#'
#' @return
#' @export
#' @import stringr
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom readr read_tsv
#' @importFrom lubridate ymd
#' @importFrom tidyr separate
#' @examples
#' get_urls_gkg_daily_summaries(remove_count_files = T)
get_urls_gkg_daily_summaries <-
  function(remove_count_files = F,
           return_message = T) {
    url <-
      'http://data.gdeltproject.org/gkg/md5sums'

    urlData <-
      url %>%
      read_tsv(col_names = F) %>%
      separate(col = X1,
                      into = c('idHash', 'stemData'),
                      sep = '\\  ') %>%
      dplyr::mutate(urlData = 'http://data.gdeltproject.org/gkg/' %>% paste0(stemData),
                    slugDatabaseGDELT = 'gkg') %>%
      separate(
        col = stemData,
        into = c('dateData', 'nameFile', 'typeFile', 'isZipFile'),
        sep = '\\.'
      ) %>%
      dplyr::mutate(
        isZipFile = ifelse(isZipFile == "zip", T, F),
        isCountFile = ifelse(nameFile == 'gkgcounts', T, F),
        dateData = dateData %>% lubridate::ymd() %>% as.Date()
      ) %>%
      dplyr::select(-c(nameFile, typeFile)) %>%
      dplyr::select(idHash,
                    dateData,
                    isZipFile,
                    isCountFile,
                    urlData,
                    everything()) %>%
      suppressWarnings() %>%
      suppressMessages()

    if (remove_count_files) {
      urlData <-
        urlData %>%
        dplyr::filter(isCountFile == F)
    }

    if (return_message) {
      count.files <-
        urlData %>%
        nrow

      min.date <-
        urlData$dateData %>% min(na.rm = T)

      max.date <-
        urlData$dateData %>% max(na.rm = T)

      "You got " %>%
        paste0(count.files,
               ' GDELT Global Knowledge Graph URLS from ',
               min.date,
               ' to ',
               max.date) %>%
        cat(fill = T)
    }

    return(urlData)
  }

#' Retrives most recent GDELT Global Content Analysis Measures (GCAM) code book
#'
#'
#' @return
#' @export
#' @importFrom magrittr %>%
#' @importFrom readr read_tsv
#' @examples
#' get_codes_gcam()
get_codes_gcam <- function() {
  url <-
    'http://data.gdeltproject.org/documentation/GCAM-MASTER-CODEBOOK.TXT'
  gcam_data <-
    url %>%
    read_tsv() %>%
    suppressWarnings() %>%
    suppressMessages()

  names(gcam_data) <-
    c(
      'idGCAM',
      'idDictionary',
      'idDimension',
      'typeDictionary',
      'codeLanguage',
      'dictionaryHumanName',
      'dimensionHumanName',
      'dictionaryCitation'
    )

  gcam_data
}

#' Retrives GDELT CAMEO religion code book
#'
#'
#' @return
#' @export
#' @importFrom magrittr %>%
#' @importFrom readr read_tsv
#' @examples
#' get_codes_cameo_religion()
get_codes_cameo_religion <- function() {
  url <-
    'http://gdeltproject.org/data/lookups/CAMEO.religion.txt'
  code_df <-
    url %>%
    read_tsv() %>%
    suppressWarnings() %>%
    suppressMessages()

  names(code_df) <-
    c('codeCAMEOReligion', 'nameCAMEOReligion')

  return(code_df)
}

#' Retrieves GDELT CAMEO country code book
#'
#'
#' @return
#' @importFrom magrittr %>%
#' @importFrom readr read_tsv
#' @export
#'
#' @examples
#' get_codes_cameo_country()
get_codes_cameo_country <- function() {
  url <-
    'http://gdeltproject.org/data/lookups/CAMEO.country.txt'
  code_df <-
    url %>%
    read_tsv() %>%
    suppressWarnings() %>%
    suppressMessages()

  names(code_df) <-
    c('codeISO', 'nameCountry')

  code_df
}

#' Retrieves GDELT CAMEO type code book
#'
#' @return
#' @importFrom magrittr %>%
#' @importFrom readr read_tsv
#' @export
#'
#' @examples
#' get_codes_cameo_type()
get_codes_cameo_type <- function() {
  url <- 'http://gdeltproject.org/data/lookups/CAMEO.type.txt'
  code_df <-
    url %>%
    read_tsv() %>%
    suppressWarnings() %>%
    suppressMessages()

  names(code_df) <-
    c('codeCAMEOType', 'nameCAMEOType')

  code_df
}

#' Retrieves CAMEO CAMEO event code book
#'
#'
#' @return
#' @export
#' @import dplyr readr stringr purrr tidyr
#' @examples
#' get_codes_cameo_events()

get_codes_cameo_events <- function() {
  url <-
    'http://gdeltproject.org/data/lookups/CAMEO.eventcodes.txt'
  code_df <-
    url %>%
    read_tsv() %>%
    suppressWarnings() %>%
    suppressMessages()

  names(code_df) <-
    c('idCAMEOEvent', 'descriptionCAMEOEvent')

  code_df <-
    code_df %>%
    dplyr::mutate(
      isParentCode = ifelse(idCAMEOEvent %>% nchar() == 2, T, F),
      idParentCode = idCAMEOEvent %>% substr(1, 2)
    ) %>%
    dplyr::select(idParentCode, everything()) %>%
    mutate_at(c("idParentCode", "idCAMEOEvent"),
              funs(as.numeric))

  return(code_df)
}

#' Retrieves GDELT CAMEO known group code book
#'
#' @return
#' @importFrom readr read_tsv
#' @importFrom magrittr %>%
#' @export
#' @examples
#' get_codes_cameo_known_groups()
get_codes_cameo_known_groups <- function() {
  url <-
    'http://gdeltproject.org/data/lookups/CAMEO.knowngroup.txt'
  code_df <-
    url %>%
    read_tsv() %>%
    suppressWarnings() %>%
    suppressMessages()

  names(code_df) <-
    c('codeCAMEOGroup', 'nameCAMEOGroup')

  return(code_df)
}

#' Retrieves GDELT CAMEO ethnic code book
#'
#' @return
#' @export
#' @import dplyr readr stringr purrr tidyr
#' @examples
#' get_codes_cameo_ethnic()
get_codes_cameo_ethnic <- function() {
  url <-
    'http://gdeltproject.org/data/lookups/CAMEO.ethnic.txt'
  code_df <-
    url %>%
    read_tsv() %>%
    suppressWarnings() %>%
    suppressMessages() %>%
    set_names(c('codeCAMEOEthnicity', 'nameCAMEOEthnicity')) %>%
    mutate(codeCAMEOEthnicity = codeCAMEOEthnicity %>% str_to_upper())

  return(code_df)
}

#' Retrieves GKG theme code book
#'
#' @return
#' @export
#' @import dplyr readr stringr purrr tidyr
#' @examples
#' get_codes_gkg_themes()

get_codes_gkg_themes <- function() {
  url <-
    'http://data.gdeltproject.org/api/v2/guides/LOOKUP-GKGTHEMES.TXT'

  code_df <-
    url %>%
    read_tsv(col_names = F) %>%
    setNames(c("codeGKGTheme", "countEvents"))

  code_df <-
    code_df %>%
    mutate(
      code2 = codeGKGTheme %>% str_to_lower(),
      isWBCode = ifelse(code2 %>% str_detect("wb_"), T, F),
      isEconomicEvent = ifelse(code2 %>% str_detect("econ_"), T, F),
      isSocialEvent = ifelse(code2 %>% str_detect("soc"), T, F),
      isTaxEvent = ifelse(code2 %>% str_detect("tax_"), T, F),
      isSocialEvent = ifelse(code2 %>% str_detect("soc_"), T, F),
      isMilitaryEvent = ifelse(code2 %>% str_detect("military|mil_"), T, F),
      isGovernmentEvent = ifelse(code2 %>% str_detect("gov_|government"), T, F),
      isMedicalEvent = ifelse(code2 %>% str_detect("med_|medical"), T, F),
      isAgressionAct = ifelse(code2 %>% str_detect("act_"), T, F),
      isMediaEvent = ifelse(code2 %>% str_detect("media_|_media"), T, F),
      isEmergencyEvent = ifelse(code2 %>% str_detect("emerg_"), T, F),
      isMovement = ifelse(code2 %>% str_detect("movement_"), T, F),
      isCriminalEvent = ifelse(code2 %>% str_detect("crime|crm_"), T, F)
    ) %>%
    dplyr::select(-code2)

  wb_codes <-
    code_df %>%
    dplyr::filter(isWBCode)

  wb_codes <-
    wb_codes %>%
    mutate(codeGKGTheme = codeGKGTheme %>% sub('\\_', '\\.', .)) %>%
    separate(
      codeGKGTheme,
      into = c('idDictionary', 'nameWBCode'),
      remove = F,
      sep = '\\.'
    ) %>%
    mutate(nameWBCode = nameWBCode %>% sub('\\_', '\\.', .)) %>%
    separate(
      nameWBCode,
      into = c('idWBCode', 'nameWBCode'),
      remove = T,
      sep = '\\.'
    ) %>%
    mutate(
      idWBCode = idWBCode %>% as.numeric,
      nameWBCode = nameWBCode %>% str_replace_all('\\_', ' ') %>% str_to_lower
    ) %>%
    dplyr::select(-idDictionary)

  non_wb <-
    code_df %>%
    dplyr::filter(!isWBCode)

  code_df <-
    non_wb %>%
    bind_rows(wb_codes)

  code_df <-
    code_df %>%
    mutate(codeGKGTheme = codeGKGTheme %>% str_replace_all('WB.', '\\WB_')) %>%
    dplyr::select(isWBCode, codeGKGTheme, idWBCode, nameWBCode, everything())

  code_df

}

#' Retrieves GDELT event summary by period
#'
#' @param period can be \code{c("yearly", "daily", "monthly")}
#' @param by_country is data by country
#' can be \code{c(TRUE, FALSE)}
#' @param return_message returns message
#' @importFrom readr read_csv
#' @return
#' @export
#'
#' @examples
#' get_data_gdelt_period_event_totals(period = 'monthly', by_country = T)

get_data_gdelt_period_event_totals <-
  function(period = 'yearly',
           by_country = T,
           return_message = T) {
    periods <-
      c('daily', 'monthly', 'yearly')
    if (!period %in% periods) {
      "Sorry periods can only be:\n" %>%
        stop(paste0(paste0(periods, collapse = '\n')))
    }

    if (by_country) {
      period_slug <-
        period %>%
        paste0('_country.csv')
    } else {
      period_slug <-
        period %>%
        paste0('.csv')
    }
    base <-
      'http://data.gdeltproject.org/normfiles/'

    url_data <-
      base %>%
      paste0(period_slug)

    period_data <-
      url_data %>%
      read_csv(col_names = F) %>%
      suppressWarnings() %>%
      suppressMessages()

    if (by_country) {
      names(period_data) <-
        c('idDate', 'idCountry', 'countEvents')
    } else {
      names(period_data) <-
        c('idDate', 'countEvents')
    }

    period_data <-
      period_data %>%
      mutate(periodData = period, isByCountry = by_country)

    if (period == 'daily') {
      period_data <-
        period_data %>%
        mutate(dateData = idDate %>% lubridate::ymd() %>% as.Date()) %>%
        dplyr::select(periodData, isByCountry, dateData, everything())
    }

    if (period == 'monthly') {
      period_data <-
        period_data %>%
        mutate(yearMonth = idDate) %>%
        dplyr::select(periodData, isByCountry, yearMonth, everything()) %>%
        mutate(
          yearData = yearMonth %>% substr(1, 4) %>% as.numeric(),
          monthData = yearMonth %>% substr(5, 6) %>% as.numeric()
        ) %>%
        select(periodData, isByCountry, yearData, monthData, everything())
    }

    if (period == 'yearly') {
      period_data <-
        period_data %>%
        mutate(yearData = idDate) %>%
        dplyr::select(periodData, isByCountry, everything())
    }

    if (return_message)  {
      from_date <-
        period_data$idDate %>% min()

      to_date <-
        period_data$idDate %>% max()

      total_events <-
        period_data$countEvents %>% sum() / 1000000
      events_slug <-
        total_events %>% paste0(" million GDELT events from ")
      "There have been " %>%
        paste0(events_slug, from_date,
               ' to ', to_date) %>%
        message
    }
    return(period_data)
  }


#' Retrieves GDELT event database schema
#'
#' @return
#' @importFrom dplyr tibble
#' @examples
#' get_schema_gdelt_events

get_schema_gdelt_events <- function() {
  gdelt_events_schema <-
    tibble(
      nameGDELT = c(
        "GLOBALEVENTID",
        "SQLDATE",
        "MonthYear",
        "Year",
        "FractionDate",
        "Actor1Code",
        "Actor1Name",
        "Actor1CountryCode",
        "Actor1KnownGroupCode",
        "Actor1EthnicCode",
        "Actor1Religion1Code",
        "Actor1Religion2Code",
        "Actor1Type1Code",
        "Actor1Type2Code",
        "Actor1Type3Code",
        "Actor2Code",
        "Actor2Name",
        "Actor2CountryCode",
        "Actor2KnownGroupCode",
        "Actor2EthnicCode",
        "Actor2Religion1Code",
        "Actor2Religion2Code",
        "Actor2Type1Code",
        "Actor2Type2Code",
        "Actor2Type3Code",
        "IsRootEvent",
        "EventCode",
        "EventBaseCode",
        "EventRootCode",
        "QuadClass",
        "GoldsteinScale",
        "NumMentions",
        "NumSources",
        "NumArticles",
        "AvgTone",
        "Actor1Geo_Type",
        "Actor1Geo_FullName",
        "Actor1Geo_CountryCode",
        "Actor1Geo_ADM1Code",
        "Actor1Geo_ADM2Code",
        "Actor1Geo_Lat",
        "Actor1Geo_Long",
        "Actor1Geo_FeatureID",
        "Actor2Geo_Type",
        "Actor2Geo_FullName",
        "Actor2Geo_CountryCode",
        "Actor2Geo_ADM1Code",
        "Actor2Geo_ADM2Code",
        "Actor2Geo_Lat",
        "Actor2Geo_Long",
        "Actor2Geo_FeatureID",
        "ActionGeo_Type",
        "ActionGeo_FullName",
        "ActionGeo_CountryCode",
        "ActionGeo_ADM1Code",
        "ActionGeo_ADM2Code",
        "ActionGeo_Lat",
        "ActionGeo_Long",
        "ActionGeo_FeatureID",
        "DATEADDED",
        "SOURCEURL"
      ),
      nameActual = c(
        "idGlobalEvent",
        "dateEvent",
        "monthYearEvent",
        "yearEvent",
        "dateFraction",
        "codeActor1",
        "nameActor1",
        "codeISOActor1",
        "codeCAMEOGroupActor1",
        "codeCAMEOEthnicityActor1",
        "codeCAMEOReligionActor1",
        "codeCAMEOReligion2Actor1",
        "codeCAMEOTypeActor1",
        "codeCAMEOType2Actor1",
        "codeCAMEOType3Actor1",
        "codeActor2",
        "nameActor2",
        "codeISOActor2",
        "codeCAMEOGroupActor2",
        "codeCAMEOEthnicityActor2",
        "codeCAMEOReligionActor2",
        "codeCAMEOReligion2Actor2",
        "codeCAMEOTypeActor2",
        "codeCAMEOType2Actor2",
        "codeCAMEOType3Actor.3",
        "isRootEvent",
        "idCAMEOEvent",
        "idCAMEOEventBase",
        "idCAMEOEventRoot",
        "classQuad",
        "scoreGoldstein",
        "countMentions",
        "countSources",
        "countArticles",
        "avgTone",
        "idTypeLocationActor1",
        "locationActor1",
        "idCountryActor1",
        "idADM1CodeActor1",
        "idADM2CodeActor1",
        "latitudeActor1",
        "longitudeActor1",
        "idFeatureActor1",
        "idTypeLocationActor2",
        "locationActor2",
        "idCountryActor2",
        "idADM1CodeActor2",
        "idADM2CodeActor2",
        "latitudeActor2",
        "longitudeActor2",
        "idFeatureActor2",
        "idTypeLocationAction",
        "locationAction",
        "idCountryAction",
        "idADM1CodeAction",
        "idADM2CodeAction",
        "latitudeAction",
        "longitudeAction",
        "idFeatureAction",
        "dateTimeDataAdded",
        "urlSource"
      )
    )
  return(gdelt_events_schema)
}

#' Gets gkg general schema
#'
#' @return
#'
#' @examples
#' get_schema_gkg_general()
get_schema_gkg_general <- function() {
  schema_df <-
    tibble(
      nameGDELT = c(
        "GKGRECORDID",
        "DATE",
        "SourceCollectionIdentifier",
        "SourceCommonName",
        "DocumentIdentifier",
        "Counts",
        "V2Counts",
        "Themes",
        "V2Themes",
        "Locations",
        "V2Locations",
        "Persons",
        "V2Persons",
        "Organizations",
        "V2Organizations",
        "V2Tone",
        "Dates",
        "GCAM",
        "SharingImage",
        "RelatedImages",
        "SocialImageEmbeds",
        "SocialVideoEmbeds",
        "Quotations",
        "AllNames",
        "Amounts",
        "TranslationInfo",
        "Extras",
        "NUMARTS",
        "COUNTS",
        "THEMES",
        "LOCATIONS",
        "PERSONS",
        "ORGANIZATIONS",
        "TONE",
        "CAMEOEVENTIDS",
        "SOURCES",
        "SOURCEURLS"

      ),
      nameActual = c(
        "idGKG",
        "dateDocument",
        "idSourceCollectionIdentifier",
        "nameSource",
        "documentSource",
        "counts",
        "countsCharLoc",
        "themes",
        "themesCharLoc",
        "locations",
        "locationsCharLoc",
        "persons",
        "personsCharLoc",
        "organizations",
        "organizationsCharLoc",
        "tone",
        "dates",
        "gcam",
        "urlImage",
        "urlImageRelated",
        "urlSocialMediaImageEmbeds",
        "urlSocialMediaVideoEmbeds",
        "quotations",
        "mentionedNamesCounts",
        "mentionedNumericsCounts",
        "translationInfo",
        "xmlExtras",
        "countArticles",
        "counts",
        "themes",
        "locations",
        "persons",
        "organizations",
        "tone",
        "idCAMEOEvents",
        "sources",
        "urlSources"
      )
    )
  return(schema_df)
}


#' Gets gkg count schema
#'
#' @return
#'
#' @examples
#' get_schema_gkg_counts()

get_schema_gkg_counts <- function() {
  counts_schema <-
    tibble(
      nameGDELT = c(
        "DATE",
        "NUMARTS",
        "COUNTTYPE",
        "NUMBER",
        "OBJECTTYPE",
        "GEO_TYPE",
        "GEO_FULLNAME",
        "GEO_COUNTRYCODE",
        "GEO_ADM1CODE",
        "GEO_LAT",
        "GEO_LONG",
        "GEO_FEATUREID",
        "CAMEOEVENTIDS",
        "SOURCES",
        "SOURCEURLS"
      ),
      nameActual = c(
        "dateEvent",
        "countArticles",
        "typeEvent",
        "countObject",
        "typeObject",
        "idTypeLocation",
        "location",
        "idCountry",
        "idADM1CodeAction",
        "latitude",
        "longitude",
        "idFeature",
        "idCAMEOEvents",
        "sources",
        "urlSources"
      )
    )
  return(counts_schema)
}

#' Gets gkg mention schema
#'
#' @return
#'
#' @examples
#' get_schema_gkg_mentions()
get_schema_gkg_mentions <- function() {
  mentions_schema <-
    tibble(
      nameGDELT =
        c(
          "GLOBALEVENTID",
          "EventTimeDate",
          "MentionTimeDate",
          "MentionType",
          "MentionSourceName",
          "MentionIdentifier",
          "SentenceID",
          "Actor1CharOffset",
          "Actor2CharOffset",
          "ActionCharOffset",
          "InRawText",
          "Confidence",
          "MentionDocLen",
          "MentionDocTone",
          "MentionDocTranslationInfo",
          "Extras"
        ),
      nameActual =
        c(
          "idGlobalEvent",
          "dateEvent",
          "dateMention",
          "idMentionType",
          "nameSource",
          "documentSource",
          "idSentence",
          "charLocActor1",
          "charLocActor2",
          "charLocAction",
          "isRawText",
          "scoreGoldsteinConfidence",
          "lengthMentionedDocument",
          "toneMentionedDocument",
          "translationMentionedDocument",
          "extra"
        )
    )

  return(mentions_schema)
}

#' Get GDELT url data
#'
#' @param url
#' @param file_directory
#' @param folder_name
#' @param remove_existing_folder
#' @param remove_files
#' @param empty_trash
#' @param return_message
#'
#' @return
#' @export
#' @import readr purrr curl dplyr lubridate tidyr stringr
#' @importFrom urltools domain
#' @examples
get_gdelt_url_data <-
  function(url = "http://data.gdeltproject.org/gdeltv2/20160531000000.gkg.csv.zip",
           file_directory = NULL,
           folder_name = 'gdelt_data',
           remove_existing_folder = T,
           remove_files = T,
           empty_trash = T,
           return_message = T) {

    use_tmp_file <-
      file_directory %>%
      is_null()

    if (use_tmp_file) {
      tmp <-
        tempfile()

      url %>%
        curl_download(url = ., tmp)

      con <-
        unzip(tmp)

      gdelt_cols <-
        con %>%
        read_tsv(col_names = F,
                 n_max = 1) %>% ncol() %>%
        suppressMessages() %>%
        suppressWarnings()

      if (gdelt_cols == 16) {
        gdelt_data <-
          con %>%
          read_tsv(col_names = F) %>%
          suppressWarnings() %>%
          suppressMessages()

        names(gdelt_data) <-
          get_schema_gkg_mentions() %>% .$nameActual

        gdelt_data <-
          gdelt_data %>%
          mutate(
            dateTimeEvent = dateEvent %>% ymd_hms() %>% lubridate::with_tz(Sys.timezone()),
            dateEvent = dateTimeEvent %>% as.Date(),
            dateTimeMention = dateMention %>% ymd_hms() %>% lubridate::with_tz(Sys.timezone()),
            dateMention = dateTimeMention %>% as.Date()
          ) %>%
          dplyr::select(idGlobalEvent,
                        dateTimeEvent,
                        dateTimeMention,
                        everything()) %>%
          dplyr::left_join(tibble(
            idMentionType = 1:6,
            typeMention = c('web', 'citation', 'core', 'dtic', 'jstor', 'nontext')
          )) %>%
          dplyr::select(idGlobalEvent:idMentionType,
                        typeMention,
                        everything()) %>%
          suppressMessages()

        gdelt_data <-
          gdelt_data %>%
          mutate_at(gdelt_data %>% dplyr::select(dplyr::matches("is")) %>% names(),
                    funs(. %>% as.logical()))

      }

      if (gdelt_cols == 15) {
        gdelt_data <-
          con %>%
          read_tsv(col_names = T) %>%
          suppressWarnings() %>%
          suppressMessages()

        names(gdelt_data) <-
          get_schema_gkg_counts() %>% .$nameActual

        gdelt_data <-
          gdelt_data %>%
          dplyr::left_join(tibble(
            idTypeLocation = 1:5,
            typeLocation = c(
              'country',
              'usState',
              'usCity',
              'worldCity',
              'worldState'
            )
          )) %>%
          suppressMessages() %>%
          dplyr::mutate(
            idRecord = 1:n(),
            idGKG = dateEvent %>% paste0('.', idRecord),
            urlSources = urlSources %>% str_replace_all("<UDIV>", ';')
          ) %>%
          dplyr::mutate(dateEvent = dateEvent %>% ymd()) %>%
          dplyr::select(dateEvent:idTypeLocation, typeLocation, everything()) %>%
          dplyr::select(idRecord, idGKG, everything())
      }

      if (gdelt_cols == 61) {
        gdelt_data <-
          con %>%
          read_tsv(col_names = F) %>%
          suppressWarnings() %>%
          suppressMessages()

        names(gdelt_data) <-
          get_schema_gdelt_events() %>% .$nameActual

        gdelt_data <-
          gdelt_data %>%
          dplyr::rename(dateTimeDocument = dateTimeDataAdded) %>%
          dplyr::mutate(
            dateEvent = dateEvent %>% lubridate::ymd,
            dateTimeDocument = dateTimeDocument %>% ymd_hms() %>% lubridate::with_tz(Sys.timezone()),
            nameSource = urlSource %>% urltools::domain() %>% str_replace_all("www.", '')
          )

        gdelt_data <-
          gdelt_data %>%
          mutate_at(gdelt_data %>% dplyr::select(dplyr::matches("is")) %>% names(),
                    funs(. %>% as.logical()))
        gdelt_data <-
          gdelt_data %>%
          dplyr::left_join(tibble(
            classQuad =  1:4,
            nameQuad =  c(
              'Verbal Cooperation',
              'Material Cooperation',
              'Verbal Conflict',
              'Material Conflict'
            )
          )) %>%
          suppressMessages()
      }

      if (gdelt_cols == 57) {
        gdelt_data <-
          con %>%
          read_tsv(col_names = F) %>%
          suppressWarnings() %>%
          suppressMessages()

        names(gdelt_data) <-
          c(
            "idGlobalEvent",
            "dateEvent",
            "monthYearEvent",
            "yearEvent",
            "dateFraction",
            "codeActor1",
            "nameActor1",
            "codeISOActor1",
            "codeCAMEOGroupActor1",
            "codeCAMEOEthnicityActor1",
            "codeCAMEOReligionActor1",
            "codeCAMEOReligion2Actor1",
            "codeCAMEOTypeActor1",
            "codeCAMEOType2Actor1",
            "codeCAMEOType3Actor1",
            "codeActor2",
            "nameActor2",
            "codeISOActor2",
            "codeCAMEOGroupActor2",
            "codeCAMEOEthnicityActor2",
            "codeCAMEOReligionActor2",
            "codeCAMEOReligion2Actor2",
            "codeCAMEOTypeActor2",
            "codeCAMEOType2Actor2",
            "codeCAMEOType3Actor.3",
            "isRootEvent",
            "idCAMEOEvent",
            "idCAMEOEventBase",
            "idCAMEOEventRoot",
            "classQuad",
            "scoreGoldstein",
            "countMentions",
            "countSources",
            "countArticles",
            "avgTone",
            "idTypeLocationActor1",
            "locationActor1",
            "idCountryActor1",
            "idADM1CodeActor1",
            "latitudeActor1",
            "longitudeActor1",
            "idFeatureActor1",
            "idTypeLocationActor2",
            "locationActor2",
            "idCountryActor2",
            "idADM1CodeActor2",
            "latitudeActor2",
            "longitudeActor2",
            "idFeatureActor2",
            "idTypeLocationAction",
            "locationAction",
            "idCountryAction",
            "idADM1CodeAction",
            "latitudeAction",
            "longitudeAction",
            "idFeatureAction",
            "dateAdded"
          )

        gdelt_data <-
          gdelt_data %>%
          dplyr::rename(dateDocument = dateAdded) %>%
          dplyr::mutate(
            dateEvent = dateEvent %>% lubridate::ymd,
            dateDocument = dateDocument %>% lubridate::ymd
          ) %>%
          suppressWarnings()

        gdelt_data <-
          gdelt_data %>%
          mutate_at(gdelt_data %>% dplyr::select(dplyr::matches("is")) %>% names(),
                    funs(. %>% as.logical()))

        gdelt_data <-
          gdelt_data %>%
          dplyr::left_join(tibble(
            classQuad =  1:4,
            nameQuad =  c(
              'Verbal Cooperation',
              'Material Cooperation',
              'Verbal Conflict',
              'Material Conflict'
            )
          )) %>%
          suppressMessages()
      }

      if (gdelt_cols == 58) {
        gdelt_data <-
          con %>%
          read_tsv(col_names = F) %>%
          suppressWarnings() %>%
          suppressMessages()

        names(gdelt_data) <-
          c(
            "idGlobalEvent",
            "dateEvent",
            "monthYearEvent",
            "yearEvent",
            "dateFraction",
            "codeActor1",
            "nameActor1",
            "codeISOActor1",
            "codeCAMEOGroupActor1",
            "codeCAMEOEthnicityActor1",
            "codeCAMEOReligionActor1",
            "codeCAMEOReligion2Actor1",
            "codeCAMEOTypeActor1",
            "codeCAMEOType2Actor1",
            "codeCAMEOType3Actor1",
            "codeActor2",
            "nameActor2",
            "codeISOActor2",
            "codeCAMEOGroupActor2",
            "codeCAMEOEthnicityActor2",
            "codeCAMEOReligionActor2",
            "codeCAMEOReligion2Actor2",
            "codeCAMEOTypeActor2",
            "codeCAMEOType2Actor2",
            "codeCAMEOType3Actor.3",
            "isRootEvent",
            "idCAMEOEvent",
            "idCAMEOEventBase",
            "idCAMEOEventRoot",
            "classQuad",
            "scoreGoldstein",
            "countMentions",
            "countSources",
            "countArticles",
            "avgTone",
            "idTypeLocationActor1",
            "locationActor1",
            "idCountryActor1",
            "idADM1CodeActor1",
            "latitudeActor1",
            "longitudeActor1",
            "idFeatureActor1",
            "idTypeLocationActor2",
            "locationActor2",
            "idCountryActor2",
            "idADM1CodeActor2",
            "latitudeActor2",
            "longitudeActor2",
            "idFeatureActor2",
            "idTypeLocationAction",
            "locationAction",
            "idCountryAction",
            "idADM1CodeAction",
            "latitudeAction",
            "longitudeAction",
            "idFeatureAction",
            "dateAdded",
            "urlSource"
          )

        gdelt_data <-
          gdelt_data %>%
          dplyr::rename(dateTimeDocument = dateAdded) %>%
          dplyr::mutate(
            dateEvent = dateEvent %>% lubridate::ymd(),
            nameSource = urlSource %>% urltools::domain() %>% str_replace_all("www.", '')
          ) %>%
          suppressWarnings()

        gdelt_data <-
          gdelt_data %>%
          dplyr::mutate_at(gdelt_data %>% dplyr::select(dplyr::matches("is")) %>% names(),
                              funs(. %>% as.logical()))

        gdelt_data <-
          gdelt_data %>%
          dplyr::left_join(tibble(
            classQuad =  1:4,
            nameQuad =  c(
              'Verbal Cooperation',
              'Material Cooperation',
              'Verbal Conflict',
              'Material Conflict.'
            )
          )) %>%
          suppressMessages()

      }

      if (gdelt_cols == 11) {
        gdelt_data <-
          con %>%
          read_tsv(col_names = T) %>%
          suppressWarnings() %>%
          suppressMessages()

        schema_df <-
          get_schema_gkg_general()

        names(gdelt_data) <-
          schema_df$nameActual[names(gdelt_data) %>% match(schema_df$nameGDELT)]

        names(gdelt_data)[1] <-
          c('date')

        gdelt_data <-
          gdelt_data %>%
          dplyr::mutate(
            idRecord = 1:n(),
            idGKG = date %>% paste0('.', idRecord),
            date = date %>% lubridate::ymd(),
            urlSources = urlSources %>% str_replace_all("<UDIV>", ';')
          ) %>%
          dplyr::select(idRecord, idGKG, everything())

      }

      if (gdelt_cols == 27) {
        gdelt_data <-
          con %>%
          read_tsv(col_names = F) %>%
          suppressWarnings() %>%
          suppressMessages()

        schema_df <-
          get_schema_gkg_general()

        names(gdelt_data) <-
          schema_df$nameActual[1:27]

        gdelt_data <-
          gdelt_data %>%
          dplyr::mutate(
            idSourceCollectionIdentifier = idSourceCollectionIdentifier %>% as.numeric(),
            isDocumentURL = ifelse(documentSource %>% str_detect('http'), T, F)
          ) %>%
          dplyr::select(idGKG:idSourceCollectionIdentifier,
                        isDocumentURL,
                        everything()) %>%
          dplyr::rename(dateTimeDocument = dateDocument) %>%
          dplyr::mutate(
            dateTimeDocument = dateTimeDocument %>% lubridate::ymd_hms() %>% lubridate::with_tz(Sys.timezone())
          ) %>%
          separate(
            idGKG,
            into = c('dateTime', 'idDateTimeArticle'),
            sep = '\\-',
            remove = F
          ) %>%
          dplyr::select(-c(dateTime, translationInfo)) %>%
          dplyr::mutate(
            idDateTimeArticle = idDateTimeArticle %>% as.numeric,
            domainSource = if_else(
              isDocumentURL == T,
              documentSource %>% urltools::domain(),
              nameSource
            )
          ) %>%
          dplyr::select(idGKG:documentSource, domainSource, everything()) %>%
          suppressMessages() %>%
          suppressWarnings()

      }
      con %>%
        unlink()
    }  else {
      only_folder <-
        !folder_name %>% is_null() &
        file_directory %>% is_null()
      if (only_folder) {
        file_directory <-
          getwd()
      }
      file_directory <-
        file_directory %>%
        paste0('/', folder_name)

      file <-
        url %>% basename()

      temp.dir <-
        file_directory

      file_path <-
        temp.dir %>% str_split('/') %>% flatten_chr() %>% .[seq_along(.)] %>% paste0(collapse = '/')
      if (remove_existing_folder) {
        if (dir.exists(paths = file_path)) {
          "rm -R " %>%
            paste0(temp.dir) %>%
            system()
          if (empty_trash) {
            system('rm -rf ~/.Trash/*')
          }
        }
      }

      if (!dir.exists(paths = file_path)) {
        dir.create(temp.dir)
      }

      file <-
        temp.dir %>%
        paste0('/', file)

      url %>%
        curl_download(url = ., destfile = file)

      file %>%
        unzip(exdir = paste0(temp.dir, '/'))

      dir_files <-
        temp.dir %>%
        list.files()

      file_name <-
        dir_files %>%
        str_detect('CSV|csv|TXT|txt|XLS|XLSX|xlsx|xls') %>%
        dir_files[.]

      csv_file_loc <-
        file_name[!file_name %>% str_detect("zip")] %>%
        paste0(file_directory, '/', .)

      gdelt_cols <-
        csv_file_loc %>%
        read_tsv(col_names = F,
                 n_max = 1) %>% ncol %>% suppressWarnings() %>%
        as.character() %>%
        parse_number() %>%
        suppressMessages()

      if (gdelt_cols == 16) {
        gdelt_data <-
          csv_file_loc %>%
          read_tsv(col_names = F) %>%
          suppressWarnings() %>%
          suppressMessages()

        names(gdelt_data) <-
          get_schema_gkg_mentions() %>% .$nameActual

        gdelt_data <-
          gdelt_data %>%
          mutate(
            dateTimeEvent = dateEvent %>% ymd_hms %>% with_tz(Sys.timezone()),
            dateEvent = dateTimeEvent %>% as.Date(),
            dateTimeMention = dateMention %>% ymd_hms %>% with_tz(Sys.timezone()),
            dateMention = dateTimeMention %>% as.Date()
          ) %>%
          dplyr::select(idGlobalEvent,
                        dateTimeEvent,
                        dateTimeMention,
                        everything()) %>%
          dplyr::left_join(tibble(
            idMentionType = 1:6,
            mention_type = c('web', 'citation', 'core', 'dtic', 'jstor', 'nontext')
          )) %>%
          dplyr::select(idGlobalEvent:idMentionType,
                        mention_type,
                        everything()) %>%
          suppressMessages()

        gdelt_data <-
          gdelt_data %>%
          mutate_at(.vars = gdelt_data %>% dplyr::select(dplyr::matches("is")) %>% names(),
                    .funs = as.logical)
      }

      if (gdelt_cols == 15) {
        gdelt_data <-
          csv_file_loc %>%
          read_tsv(col_names = T) %>%
          suppressWarnings() %>%
          suppressMessages()

        names(gdelt_data) <-
          get_schema_gkg_counts() %>% .$nameActual

        gdelt_data <-
          gdelt_data %>%
          dplyr::left_join(tibble(
            idTypeLocation = 1:5,
            typeLocation = c(
              'country',
              'usState',
              'usCity',
              'worldCity',
              'worldState'
            )
          )) %>%
          suppressMessages() %>%
          dplyr::mutate(
            idRecord = 1:n(),
            idGKG = dateEvent %>% paste0('.', idRecord),
            urlSources = urlSources %>% str_replace_all("<UDIV>", ';')
          ) %>%
          dplyr::mutate(dateEvent = dateEvent %>% ymd()) %>%
          dplyr::select(dateEvent:idTypeLocation, typeLocation, everything()) %>%
          dplyr::select(idRecord, idGKG, everything())
      }

      if (gdelt_cols == 61) {
        gdelt_data <-
          csv_file_loc %>%
          read_tsv(col_names = F) %>%
          suppressWarnings() %>%
          suppressMessages()

        names(gdelt_data) <-
          get_schema_gdelt_events() %>% .$nameActual

        gdelt_data <-
          gdelt_data %>%
          dplyr::rename(dateTimeDocument = dateTimeDataAdded) %>%
          dplyr::mutate(
            dateEvent = dateEvent %>% lubridate::ymd,
            dateTimeDocument = dateTimeDocument %>% ymd_hms() %>% with_tz(Sys.timezone()),
            nameSource = urlSource %>% domain() %>% str_replace_all("www.", '')
          )

        gdelt_data <-
          gdelt_data %>%
          dplyr::mutate_at(gdelt_data %>% dplyr::select(dplyr::matches("is")) %>% names(),
                           funs(. %>% as.logical()))

        gdelt_data <-
          gdelt_data %>%
          dplyr::left_join(tibble(
            classQuad =  1:4,
            nameQuad =  c(
              'Verbal Cooperation',
              'Material Cooperation',
              'Verbal Conflict',
              'Material Conflict'
            )
          )) %>%
          suppressMessages()
      }

      if (gdelt_cols == 57) {
        gdelt_data <-
          csv_file_loc %>%
          read_tsv(col_names = F) %>%
          suppressWarnings() %>%
          suppressMessages()

        names(gdelt_data) <-
          c(
            "idGlobalEvent",
            "dateEvent",
            "monthYearEvent",
            "yearEvent",
            "dateFraction",
            "codeActor1",
            "nameActor1",
            "codeISOActor1",
            "codeCAMEOGroupActor1",
            "codeCAMEOEthnicityActor1",
            "codeCAMEOReligionActor1",
            "codeCAMEOReligion2Actor1",
            "codeCAMEOTypeActor1",
            "codeCAMEOType2Actor1",
            "codeCAMEOType3Actor1",
            "codeActor2",
            "nameActor2",
            "codeISOActor2",
            "codeCAMEOGroupActor2",
            "codeCAMEOEthnicityActor2",
            "codeCAMEOReligionActor2",
            "codeCAMEOReligion2Actor2",
            "codeCAMEOTypeActor2",
            "codeCAMEOType2Actor2",
            "codeCAMEOType3Actor.3",
            "isRootEvent",
            "idCAMEOEvent",
            "idCAMEOEventBase",
            "idCAMEOEventRoot",
            "classQuad",
            "scoreGoldstein",
            "countMentions",
            "countSources",
            "countArticles",
            "avgTone",
            "idTypeLocationActor1",
            "locationActor1",
            "idCountryActor1",
            "idADM1CodeActor1",
            "latitudeActor1",
            "longitudeActor1",
            "idFeatureActor1",
            "idTypeLocationActor2",
            "locationActor2",
            "idCountryActor2",
            "idADM1CodeActor2",
            "latitudeActor2",
            "longitudeActor2",
            "idFeatureActor2",
            "idTypeLocationAction",
            "locationAction",
            "idCountryAction",
            "idADM1CodeAction",
            "latitudeAction",
            "longitudeAction",
            "idFeatureAction",
            "dateAdded"
          )

        gdelt_data <-
          gdelt_data %>%
          dplyr::rename(dateDocument = dateAdded) %>%
          dplyr::mutate(
            dateEvent = dateEvent %>% lubridate::ymd,
            dateDocument = dateDocument %>% lubridate::ymd
          ) %>%
          suppressWarnings()

        gdelt_data <-
          gdelt_data %>%
          dplyr::mutate_at(gdelt_data %>% dplyr::select(dplyr::matches("is")) %>% names(),
                           funs(. %>% as.logical()))

        gdelt_data <-
          gdelt_data %>%
          dplyr::left_join(tibble(
            classQuad =  1:4,
            nameQuad =  c(
              'Verbal Cooperation',
              'Material Cooperation',
              'Verbal Conflict',
              'Material Conflict'
            )
          )) %>%
          suppressMessages()
      }

      if (gdelt_cols == 58) {
        load_needed_packages(c('urltools'))
        gdelt_data <-
          csv_file_loc %>%
          read_tsv(col_names = F) %>%
          suppressWarnings() %>%
          suppressMessages()

        names(gdelt_data) <-
          c(
            "idGlobalEvent",
            "dateEvent",
            "monthYearEvent",
            "yearEvent",
            "dateFraction",
            "codeActor1",
            "nameActor1",
            "codeISOActor1",
            "codeCAMEOGroupActor1",
            "codeCAMEOEthnicityActor1",
            "codeCAMEOReligionActor1",
            "codeCAMEOReligion2Actor1",
            "codeCAMEOTypeActor1",
            "codeCAMEOType2Actor1",
            "codeCAMEOType3Actor1",
            "codeActor2",
            "nameActor2",
            "codeISOActor2",
            "codeCAMEOGroupActor2",
            "codeCAMEOEthnicityActor2",
            "codeCAMEOReligionActor2",
            "codeCAMEOReligion2Actor2",
            "codeCAMEOTypeActor2",
            "codeCAMEOType2Actor2",
            "codeCAMEOType3Actor.3",
            "isRootEvent",
            "idCAMEOEvent",
            "idCAMEOEventBase",
            "idCAMEOEventRoot",
            "classQuad",
            "scoreGoldstein",
            "countMentions",
            "countSources",
            "countArticles",
            "avgTone",
            "idTypeLocationActor1",
            "locationActor1",
            "idCountryActor1",
            "idADM1CodeActor1",
            "latitudeActor1",
            "longitudeActor1",
            "idFeatureActor1",
            "idTypeLocationActor2",
            "locationActor2",
            "idCountryActor2",
            "idADM1CodeActor2",
            "latitudeActor2",
            "longitudeActor2",
            "idFeatureActor2",
            "idTypeLocationAction",
            "locationAction",
            "idCountryAction",
            "idADM1CodeAction",
            "latitudeAction",
            "longitudeAction",
            "idFeatureAction",
            "dateAdded",
            "urlSource"
          )

        gdelt_data <-
          gdelt_data %>%
          dplyr::rename(dateTimeDocument = dateAdded) %>%
          dplyr::mutate(
            dateEvent = dateEvent %>% lubridate::ymd,
            dateTimeDocument %>% lubridate::ymd_hms() %>% with_tz(Sys.timezone()),
            dateDocument = dateTimeDocument %>% as.Date(),
            nameSource = urlSource %>% domain() %>% str_replace_all("www.", '')
          ) %>%
          suppressWarnings()

        gdelt_data <-
          gdelt_data %>%
          dplyr::mutate_at(gdelt_data %>% dplyr::select(dplyr::matches("is")) %>% names(),
                           funs(. %>% as.logical()))

        gdelt_data <-
          gdelt_data %>%
          dplyr::left_join(tibble(
            classQuad =  1:4,
            nameQuad =  c(
              'Verbal Cooperation',
              'Material Cooperation',
              'Verbal Conflict',
              'Material Conflict.'
            )
          )) %>%
          suppressMessages()

      }

      if (gdelt_cols == 11) {
        gdelt_data <-
          csv_file_loc %>%
          read_tsv(col_names = T) %>%
          suppressWarnings() %>%
          suppressMessages()

        schema_df <-
          get_schema_gkg_general()

        names(gdelt_data) <-
          schema_df$nameActual[names(gdelt_data) %>% match(schema_df$nameGDELT)]

        names(gdelt_data)[1] <-
          c('date')

        gdelt_data <-
          gdelt_data %>%
          dplyr::mutate(
            idRecord = 1:n(),
            idGKG = date %>% paste0('.', idRecord),
            date = date %>% lubridate::ymd(),
            urlSources = urlSources %>% str_replace_all("<UDIV>", ';')
          ) %>%
          dplyr::select(idRecord, idGKG, everything())

      }

      if (gdelt_cols == 27) {
        gdelt_data <-
          csv_file_loc %>%
          read_tsv(col_names = F) %>%
          suppressWarnings() %>%
          suppressMessages()

        schema_df <-
          get_schema_gkg_general()

        names(gdelt_data) <-
          schema_df$nameActual[1:27]

        gdelt_data <-
          gdelt_data %>%
          dplyr::mutate(
            idSourceCollectionIdentifier = idSourceCollectionIdentifier %>% as.numeric(),
            isDocumentURL = ifelse(documentSource %>% str_detect('http'), T, F)
          ) %>%
          dplyr::select(idGKG:idSourceCollectionIdentifier,
                        isDocumentURL,
                        everything()) %>%
          dplyr::rename(dateTimeDocument = dateDocument) %>%
          dplyr::mutate(dateTimeDocument = dateTimeDocument %>% lubridate::ymd_hms() %>% with_tz(Sys.timezone())) %>%
          separate(
            idGKG,
            into = c('dateTime', 'idDateTimeArticle'),
            sep = '\\-',
            remove = F
          ) %>%
          dplyr::select(-c(dateTime, translationInfo)) %>%
          dplyr::mutate(
            idDateTimeArticle = idDateTimeArticle %>% as.numeric,
            domainSource = if_else(
              isDocumentURL == T,
              documentSource %>% urltools::domain,
              nameSource
            )
          ) %>%
          dplyr::select(idGKG:documentSource, domainSource, everything()) %>%
          suppressMessages() %>%
          suppressWarnings()

      }

      if (remove_files) {
        "rm -R " %>%
          paste0(temp.dir) %>%
          system()
        if (empty_trash) {
          system('rm -rf ~/.Trash/*')
        }
      }


    }
    if ('idADM1CodeActor1' %in% names(gdelt_data)) {
      gdelt_data <-
        gdelt_data %>%
        mutate(idADM1CodeActor1 = idADM1CodeActor1 %>% as.character())

    }

    if ('idADM1CodeActor2' %in% names(gdelt_data)) {
      gdelt_data <-
        gdelt_data %>%
        mutate(idADM1CodeActor2 = idADM1CodeActor2 %>% as.character())

    }

    if ('idADM2CodeActor1' %in% names(gdelt_data)) {
      gdelt_data <-
        gdelt_data %>%
        mutate(idADM2CodeActor1 = idADM2CodeActor1 %>% as.character())

    }

    if ('idADM2CodeActor2' %in% names(gdelt_data)) {
      gdelt_data <-
        gdelt_data %>%
        mutate(idADM2CodeActor2 = idADM2CodeActor2 %>% as.character())

    }

    if ('idADM1CodeAction' %in% names(gdelt_data)) {
      gdelt_data <-
        gdelt_data %>%
        mutate(idADM1CodeAction = idADM1CodeAction %>% as.character())

    }

    if ('idADM2CodeAction' %in% names(gdelt_data)) {
      gdelt_data <-
        gdelt_data %>%
        mutate(idADM2CodeAction = idADM2CodeAction %>% as.character())

    }

    if ('idCAMEOEvent' %in% names(gdelt_data)) {
      gdelt_data <-
        gdelt_data %>%
        mutate_at(
          .vars = gdelt_data %>% dplyr::select(dplyr::matches('idCAMEOEvent')) %>% names(),
          funs(. %>% as.character() %>% as.numeric())
        )

    }

    if ('idFeatureActor' %in% names(gdelt_data)) {
      gdelt_data <-
        gdelt_data %>%
        mutate_at(.vars = gdelt_data %>% dplyr::select(dplyr::matches('idFeatureActor')) %>% names(),
                  .funs = as.character)

    }

    if (return_message) {
      "Downloaded, parsed and imported " %>%
        paste0(url) %>%
        cat(fill = T)
    }
    return(gdelt_data)
  }

#' Gets clean count data
#'
#' @param all_counts
#' @param count_col
#' @param return_wide
#'
#' @return
#'
#' @examples
get_clean_count_data <-
  function(all_counts,
           extra_key = NA,
           count_col = 'idArticle.tone',
           return_wide = F) {
    if (!extra_key %>% is.na()) {
      clean_data <-
        all_counts %>%
        dplyr::rename_(count = count_col,
                       ek = extra_key) %>%
        group_by(idGKG) %>%
        mutate(count = 1:n()) %>%
        ungroup %>%
        mutate(count = count - 1) %>%
        gather(item, value, -c(idGKG, count, ek), na.rm = T) %>%
        mutate(item = ifelse(count == 0, item, item %>% paste0(count)),
               value = value %>% str_trim) %>%
        separate(
          idGKG,
          into = c('GKG', 'dateTime'),
          sep = '\\-',
          remove = F
        ) %>%
        mutate(dateTime = dateTime %>% as.numeric) %>%
        arrange(dateTime, count) %>%
        dplyr::select(-c(dateTime, GKG))
    } else {
      clean_data <-
        all_counts %>%
        dplyr::rename_(count = count_col) %>%
        group_by(idGKG) %>%
        mutate(count = 1:n()) %>%
        ungroup %>%
        mutate(count = count - 1)
      if ('charLoc' %in% names(clean_data)) {
        clean_data <-
          clean_data %>%
          gather(item, value, -c(idGKG, count, charLoc), na.rm = T) %>%
          mutate(
            item = ifelse(count == 0, item, item %>% paste0(count)),
            value = value %>% str_trim
          ) %>%
          distinct() %>%
          separate(
            idGKG,
            into = c('GKG', 'dateTime'),
            sep = '\\-',
            remove = F
          ) %>%
          mutate(dateTime = dateTime %>% as.numeric) %>%
          arrange(dateTime, count) %>%
          dplyr::select(-c(dateTime, GKG))
      } else {
        clean_data <-
          clean_data %>%
          gather(item, value, -c(idGKG, count), na.rm = T) %>%
          mutate(
            item = ifelse(count == 0, item, item %>% paste0(count)),
            value = value %>% str_trim
          ) %>%
          distinct() %>%
          separate(
            idGKG,
            into = c('GKG', 'dateTime'),
            sep = '\\-',
            remove = F
          ) %>%
          mutate(dateTime = dateTime %>% as.numeric) %>%
          arrange(dateTime, count) %>%
          dplyr::select(-c(dateTime, GKG))
      }
    }

    if (return_wide) {
      if (!extra_key %>% is.na()) {
        names_order <-
          c('idGKG', 'ek',  clean_data$item %>% unique)
      } else {
        names_order <-
          c('idGKG',  clean_data$item %>% unique)
      }

      clean_data <-
        clean_data %>%
        dplyr::select(-count) %>%
        spread(item, value)

      keywords <-
        c(
          'amountValue',
          'latitude',
          'scoreGoldstein',
          'articleWordCount',
          'longitude',
          'countEvent',
          'idTypeLocation',
          'charLoc',
          'length',
          'count',
          'month',
          'day',
          'year',
          'score'
        )

      length_nums <-
        clean_data %>%
        dplyr::select(dplyr::matches(keywords %>% paste0(collapse = '|'))) %>%
        dplyr::select(-dplyr::matches('namePerson|idCountry')) %>%
        names %>% length

      if (length_nums > 0) {
        numeric_vars <-
          clean_data %>%
          dplyr::select(dplyr::matches(keywords %>% paste0(collapse = '|'))) %>%
          dplyr::select(-dplyr::matches('namePerson|idCountry')) %>%
          names

        clean_data <-
          clean_data %>%
          dplyr::mutate_at(numeric_vars,
                           funs(. %>% as.numeric()))

      }

      clean_data <-
        clean_data %>%
        dplyr::select_(.dots = names_order) %>%
        separate(
          idGKG,
          into = c('GKG', 'dateTime'),
          sep = '\\-',
          remove = F
        ) %>%
        mutate(dateTime = dateTime %>% as.numeric) %>%
        arrange(dateTime) %>%
        dplyr::select(-c(GKG, dateTime))

      if (!extra_key %>% is.na()) {
        names(clean_data)[2] <-
          extra_key
      }
    } else {
      clean_data <-
        clean_data %>%
        dplyr::select(-count) %>%
        separate(
          idGKG,
          into = c('GKG', 'dateTime'),
          sep = '\\-',
          remove = F
        ) %>%
        mutate(dateTime = dateTime %>% as.numeric) %>%
        arrange(dateTime) %>%
        dplyr::select(-c(GKG, dateTime))

    }

    return(clean_data)
  }

#' Returns long or wide mentioned numerics from a GKG data frame
#'
#' @param gdelt_data
#' @param filter_na
#' @param include_char_locg
#' @param return_wide
#' @importFrom tidyr gather
#' @importFrom tidyr unite
#' @importFrom tidyr spread
#' @importFrom tidyr separate
#' @importFrom purrr map
#' @importFrom purrr compact
#' @import stringr purrr
#' @return
#' @export
#'
#' @examples
parse_gkg_mentioned_numerics <- function(gdelt_data,
                                         filter_na = T,
                                         include_char_loc = T,
                                         return_wide = T) {
  parse_mentioned_numerics <-
    function(field = "170,Scotland Road,1600;170,Scotland Road,2475;",
             return_wide = F) {
      options(scipen = 99999)
      if (field %>% is.na) {
        if (return_wide) {
          field_data <-
            tibble(
              amountValue1 = NA,
              amountTerm1 = NA,
              charLoc = NA
            )
        } else {
          field_data <-
            tibble(
              amountValue = NA,
              amountTerm = NA,
              charLoc = NA,
              idArticleNumericItem = 1
            )
        }
      } else {
        fields <-
          field %>%
          str_split('\\;') %>%
          flatten_chr() %>%
          .[!. %in% '']

        fields_df <-
          tibble(field = fields) %>%
          dplyr::mutate(idArticleNumericItem = 1:n()) %>%
          separate(
            field,
            into = c('amountValue', 'amountTerm', 'charLoc'),
            sep = '\\,'
          ) %>%
          dplyr::mutate(amountTerm = amountTerm %>% str_trim) %>%
          suppressMessages() %>%
          suppressWarnings()

        if (return_wide) {
          fields_df <-
            fields_df %>%
            gather(item, value, -idArticleNumericItem) %>%
            arrange(idArticleNumericItem) %>%
            unite(item, item, idArticleNumericItem, sep = '.')

          order_fields <-
            fields_df$item

          field_data <-
            fields_df %>%
            spread(item, value) %>%
            dplyr::select_(.dots = order_fields)

          field_data <-
            field_data %>%
            dplyr::mutate_at(
              field_data %>% dplyr::select(dplyr::matches('amountValue|charLoc')) %>% names(),
              funs(. %>% as.character() %>% parse_number())
            )

        } else {
          field_data <-
            fields_df

          field_data <-
            field_data %>%
            dplyr::select(idArticleNumericItem,
                          amountValue,
                          amountTerm,
                          charLoc) %>%
            dplyr::mutate(charLoc = charLoc %>% as.numeric,
                          amountValue = amountValue %>% as.numeric)
        }
      }

      return(field_data)
    }

  if (!'mentionedNumericsCounts' %in% names(gdelt_data)) {
    stop("Sorry missing numeric count column")
  }
  counts_data <-
    gdelt_data %>%
    dplyr::select(idGKG, mentionedNumericsCounts)

  if (filter_na) {
    counts_data <-
      counts_data %>%
      dplyr::filter(!mentionedNumericsCounts %>% is.na)
  }

  all_counts <-
    seq_along(counts_data$mentionedNumericsCounts) %>%
    map_dfr(function(x) {
      parse_mentioned_numerics(field = counts_data$mentionedNumericsCounts[x],
                               return_wide = F) %>%
        dplyr::mutate(idGKG = counts_data$idGKG[x])
    }) %>%
    dplyr::select(idGKG, everything())

  if (include_char_loc == F) {
    all_counts <-
      all_counts %>%
      dplyr::select(-charLoc)
  }

  all_counts <-
    all_counts %>%
    get_clean_count_data(count_col = 'idArticleNumericItem', return_wide = return_wide) %>%
    separate(
      idGKG,
      into = c('GKG', 'dateTime'),
      sep = '\\-',
      remove = F
    ) %>%
    mutate(dateTime = dateTime %>% as.numeric) %>%
    arrange(dateTime) %>%
    dplyr::select(-c(dateTime, GKG)) %>%
    suppressWarnings()

  if (!return_wide) {
    all_counts <-
      all_counts %>%
      .resolve_long_names()
  }

  return(all_counts)
}

#' Returns long or wide mentioned people from a GKG data frame
#'
#' @param gdelt_data
#' @param people_column options \code{c('person', 'persons', 'persons.count', 'personsCharLoc', 'charLoc'))}
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#'
#' @examples
parse_gkg_mentioned_people <- function(gdelt_data,
                                       people_column = 'personsCharLoc',
                                       filter_na = T,
                                       return_wide = T) {
  people_count_cols <-
    c('person',
      'persons',
      'persons.count',
      'personsCharLoc',
      'charLoc')

  if (!people_column %in% people_count_cols) {
    stop("Sorry people column can only be\n" %>%
           paste0(paste0(people_count_cols, collapse = '\n')))
  }

  if (people_column %in% c('person', 'persons')) {
    people_column <-
      'persons'
  }

  if (people_column %in% c('persons.count', 'personsCharLoc', 'charLoc')) {
    people_column <-
      'personsCharLoc'
  }

  parse_mentioned_people_counts <-
    function(field = "Chaudhry Nisar Ali Khan,63",
             return_wide = F) {
      options(scipen = 99999)
      if (field %>% is.na) {
        if (return_wide) {
          field_data <-
            tibble(namePerson1 = NA, charLoc1 = NA)
        } else {
          field_data <-
            tibble(
              namePerson = NA,
              charLoc = NA,
              idArticlePerson = 1
            )
        }
      } else {
        fields <-
          field %>%
          str_split('\\;') %>%
          flatten_chr() %>%
          .[!. %in% '']

        fields_df <-
          tibble(field = fields) %>%
          dplyr::mutate(idArticlePerson = 1:n()) %>%
          separate(field,
                   into = c('namePerson', 'charLoc'),
                   sep = '\\,') %>%
          suppressWarnings() %>%
          suppressMessages()

        if (return_wide) {
          fields_df <-
            fields_df %>%
            gather(item, value, -idArticlePerson) %>%
            arrange(idArticlePerson) %>%
            unite(item, item, idArticlePerson, sep = '.') %>%
            suppressWarnings()

          order_fields <-
            fields_df$item

          field_data <-
            fields_df %>%
            spread(item, value) %>%
            dplyr::select_(.dots = order_fields)

          field_data <-
            field_data %>%
            mutate_at(
              field_data %>% dplyr::select(dplyr::matches('charLoc')) %>% names(),
              funs(. %>% as.character() %>% parse_number())
            )

        } else {
          field_data <-
            fields_df

          field_data <-
            field_data %>%
            dplyr::select(idArticlePerson, namePerson, charLoc) %>%
            dplyr::mutate(charLoc = charLoc %>% as.numeric)
        }
      }

      return(field_data)
    }

  if (!people_column %in% names(gdelt_data)) {
    stop("Sorry missing people column")
  }

  col_names <-
    c('idGKG', people_column)

  counts_data <-
    gdelt_data %>%
    dplyr::select_(.dots = col_names)

  names(counts_data)[2] <-
    'people_col'

  if (filter_na) {
    counts_data <-
      counts_data %>%
      dplyr::filter(!people_col %>% is.na())
  }

  all_counts <-
    seq_along(counts_data$people_col) %>%
    map_dfr(function(x) {
      parse_mentioned_people_counts(field = counts_data$people_col[x],
                                    return_wide = F) %>%
        dplyr::mutate(idGKG = counts_data$idGKG[x])
    }) %>%
    dplyr::select(idGKG, everything())

  if (people_column == 'persons') {
    all_counts <-
      all_counts %>%
      dplyr::select(-charLoc)
  }

  all_counts <-
    all_counts %>%
    get_clean_count_data(count_col = 'idArticlePerson', return_wide = return_wide) %>%
    separate(
      idGKG,
      into = c('GKG', 'dateTime'),
      sep = '\\-',
      remove = F
    ) %>%
    mutate(dateTime = dateTime %>% as.numeric) %>%
    arrange(dateTime) %>%
    dplyr::select(-c(dateTime, GKG)) %>%
    suppressWarnings()

  if (!return_wide) {
    all_counts <-
      all_counts %>%
      .resolve_long_names()
  }

  return(all_counts)
}

#' Returns long or wide mentioned organizations from a GKG data frame
#'
#' @param gdelt_data
#' @param organization_column options \code{c('organization', 'organizations', 'organizations.count', 'organizationsCharLoc', 'charLoc'))}
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#'
#' @examples
parse_gkg_mentioned_organizations <- function(gdelt_data,
                                              organization_column = 'organizations',
                                              filter_na = T,
                                              return_wide = T) {
  organization_count_cols <-
    c(
      'organization',
      'organizations',
      'organizations.count',
      'organizationsCharLoc',
      'charLoc'
    )

  if (!organization_column %in% organization_count_cols) {
    stop("Sorry people column can only be\n" %>%
           paste0(paste0(organization_count_cols, collapse = '\n')))
  }

  if (organization_column %in% c('organization', 'organizations')) {
    organization_column <-
      'organizations'
  }

  if (organization_column %in% c('organizations.count',
                                 'organizationsCharLoc',
                                 'charLoc')) {
    organization_column <-
      'personsCharLoc'
  }

  parse_mentioned_organization_counts <-
    function(field = "Twitter,2912;Pegasystems,169;Pegasystems,1238;Pegasystems,1829;Pegasystems,2079;Pegasystems,2888;Nasdaq,193;Nasdaq,2086;Ilena Ryan Pegasystems Inc,2892;Pegasystems Inc,173;Pegasystems Inc,2892",
             return_wide = F) {
      options(scipen = 99999)
      if (field %>% is.na) {
        if (return_wide) {
          field_data <-
            tibble(nameOrganization1 = NA,
                       charLoc1 = NA)
        } else {
          field_data <-
            tibble(
              nameOrganization = NA,
              charLoc = NA,
              idArticle.organization = 1
            )
        }
      } else {
        fields <-
          field %>%
          str_split('\\;') %>%
          flatten_chr() %>%
          .[!. %in% '']

        fields_df <-
          tibble(field = fields) %>%
          dplyr::mutate(idArticle.organization = 1:n()) %>%
          separate(field,
                   into = c('nameOrganization', 'charLoc'),
                   sep = '\\,') %>%
          suppressMessages() %>%
          suppressWarnings()

        if (return_wide) {
          fields_df <-
            fields_df %>%
            gather(item, value, -idArticle.organization) %>%
            arrange(idArticle.organization) %>%
            unite(item, item, idArticle.organization, sep = '.')

          order_fields <-
            fields_df$item

          field_data <-
            fields_df %>%
            spread(item, value) %>%
            dplyr::select_(.dots = order_fields)

          field_data <-
            field_data %>%
            mutate_at(
              field_data %>% dplyr::select(dplyr::matches('charLoc')) %>% names(),
              funs(. %>% as.character() %>% parse_number())
            )
        } else {
          field_data <-
            fields_df

          field_data <-
            field_data %>%
            dplyr::select(idArticle.organization, nameOrganization, charLoc) %>%
            dplyr::mutate(charLoc = charLoc %>% as.numeric)
        }
      }

      return(field_data)
    }

  if (!organization_column %in% names(gdelt_data)) {
    stop("Sorry missing organization column")
  }

  col_names <-
    c('idGKG', organization_column)

  counts_data <-
    gdelt_data %>%
    dplyr::select_(.dots = col_names)

  names(counts_data)[2] <-
    'org_col'

  if (filter_na) {
    counts_data <-
      counts_data %>%
      dplyr::filter(!org_col %>% is.na())
  }

  all_counts <-
    seq_along(counts_data$org_col) %>%
    map_dfr(function(x) {
      parse_mentioned_organization_counts(field = counts_data$org_col[x],
                                          return_wide = F) %>%
        dplyr::mutate(idGKG = counts_data$idGKG[x])
    }) %>%
    dplyr::select(idGKG, everything())

  if (organization_column == 'organizations') {
    all_counts <-
      all_counts %>%
      dplyr::select(-charLoc)
  }

  all_counts <-
    all_counts %>%
    get_clean_count_data(count_col = 'idArticle.organization', return_wide = return_wide) %>%
    separate(
      idGKG,
      into = c('GKG', 'dateTime'),
      sep = '\\-',
      remove = F
    ) %>%
    mutate(dateTime = dateTime %>% as.numeric) %>%
    arrange(dateTime) %>%
    dplyr::select(-c(dateTime, GKG)) %>%
    suppressWarnings()

  if (!return_wide) {
    all_counts <-
      all_counts %>%
      .resolve_long_names()
  }


  return(all_counts)
}

#' Returns mentioned names from a GKG data frame.
#'
#' @param gdelt_data
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#'
#' @examples
parse_gkg_mentioned_names <- function(gdelt_data,
                                      filter_na = T,
                                      return_wide = T) {
  parse_mentioned_names_counts <-
    function(field = "Interior Minister Chaudhry Nisar Ali Khan,47;Mullah Mansour,87;Afghan Taliban,180;Mullah Mansour,382;Mullah Mansor,753;Mullah Mansour,815;Mullah Mansour,1025",
             return_wide = return_wide) {
      options(scipen = 99999)
      if (field %>% is.na()) {
        if (return_wide) {
          field_data <-
            tibble(nameMentionedName1 = NA,
                       charLoc1 = NA)
        } else {
          field_data <-
            tibble(
              nameMentionedName = NA,
              charLoc = NA,
              idArticleMentionedName = 1
            )
        }
      } else {
        fields <-
          field %>%
          str_split('\\;') %>%
          flatten_chr() %>%
          .[!. %in% '']

        fields_df <-
          tibble(field = fields) %>%
          dplyr::mutate(idArticleMentionedName = 1:n()) %>%
          separate(field,
                   into = c('nameMentionedName', 'charLoc'),
                   sep = '\\,') %>%
          mutate(charLoc = charLoc %>% as.numeric()) %>%
          suppressMessages() %>%
          suppressWarnings()

        if (return_wide) {
          fields_df <-
            fields_df %>%
            gather(item, value, -c(idArticleMentionedName, charLoc)) %>%
            arrange(idArticleMentionedName) %>%
            unite(item, item, idArticleMentionedName, sep = '')

          order_fields <-
            fields_df$item

          field_data <-
            fields_df %>%
            dplyr::select(-dplyr::matches("charLoc")) %>%
            spread(item, value) %>%
            dplyr::select_(.dots = order_fields)

          field_data <-
            field_data %>%
            mutate_at(
              field_data %>% dplyr::select(dplyr::matches('charLoc')) %>% names(),
              funs(. %>% as.character() %>% parse_number())
            )
        } else {
          field_data <-
            fields_df

          field_data <-
            field_data %>%
            dplyr::select(idArticleMentionedName,
                          charLoc,
                          nameMentionedName)

        }
      }

      return(field_data)
    }

  if (!'mentionedNamesCounts' %in% names(gdelt_data)) {
    stop("Sorry missing metioned name column")
  }
  counts_data <-
    gdelt_data %>%
    dplyr::select(idGKG, mentionedNamesCounts)

  all_counts <-
    seq_along(counts_data$mentionedNamesCounts) %>%
    map_dfr(function(x) {
      parse_mentioned_names_counts(field = counts_data$mentionedNamesCounts[x],
                                   return_wide = F) %>%
        dplyr::mutate(idGKG = counts_data$idGKG[x])
    }) %>%
    dplyr::select(idGKG, everything())

  if (filter_na) {
    if ('nameMentionedName' %in% names(all_counts)) {
      all_counts <-
        all_counts %>%
        dplyr::filter(!nameMentionedName %>% is.na())
    }
  }

  all_counts <-
    all_counts %>%
    get_clean_count_data(count_col = 'idArticleMentionedName',
                         return_wide = F) %>%
    separate(
      idGKG,
      into = c('GKG', 'dateTime'),
      sep = '\\-',
      remove = F
    ) %>%
    mutate(dateTime = dateTime %>% as.numeric()) %>%
    select(-dplyr::matches("charLoc")) %>%
    arrange(dateTime) %>%
    dplyr::select(-c(dateTime, GKG)) %>%
    suppressWarnings()

  if (return_wide) {
    all_counts <-
      all_counts %>%
      spread(item, value)
  }

  if (!return_wide) {
    all_counts <-
      all_counts %>%
      .resolve_long_names()
  }

  return(all_counts)
}

#' Returns mentioned themes from a gkg data frame
#'
#' @param gdelt_data
#' @param filter_na
#' @param organization_column options \code{c('theme', 'themes', 'countThemes', 'themesCharLoc', 'charLoc'))}
#' @param return_wide
#' @importFrom purrr map
#' @import purrr
#' @return
#' @export
#'
#' @examples

parse_gkg_mentioned_themes <- function(gdelt_data,
                                       filter_na = T,
                                       theme_column = 'themes',
                                       return_wide = T) {
  theme_count_cols <-
    c('theme',
      'themes',
      'countThemes',
      'themesCharLoc',
      'charLoc')

  if (!theme_column %in% theme_count_cols) {
    stop("Sorry theme column can only be\n" %>%
           paste0(paste0(theme_count_cols, collapse = '\n')))
  }

  if (theme_column %in% c('theme', 'themes')) {
    theme_column <-
      'themes'
  }

  if (theme_column %in% c('countThemes', 'themesCharLoc', 'charLoc')) {
    theme_column <-
      'themesCharLoc'
  }

  parse_mentioned_names_themes <-
    function(field = "https://youtube.com/esctodaytv;https://youtube.com/embed/5ymFX91HwM0?wmode=transparent&#038;modestbranding=1&#038;autohide=1&#038;showinfo=0&#038;rel=0;",
             return_wide = F) {
      options(scipen = 99999)
      if (field %>% is.na) {
        if (return_wide) {
          field_data <-
            tibble(codeTheme1 = NA, charLoc1 = NA)
        } else {
          field_data <-
            tibble(
              codeTheme = NA,
              charLoc = NA,
              idArticleGKGTheme = 1
            )
        }
      } else {
        fields <-
          field %>%
          str_split('\\;') %>%
          flatten_chr() %>%
          .[!. %in% '']

        fields_df <-
          tibble(field = fields) %>%
          dplyr::mutate(idArticleGKGTheme = 1:n()) %>%
          separate(field,
                   into = c('codeTheme', 'charLoc'),
                   sep = '\\,') %>%
          suppressWarnings()

        if (return_wide) {
          fields_df <-
            fields_df %>%
            gather(item, value, -idArticleGKGTheme) %>%
            arrange(idArticleGKGTheme) %>%
            unite(item, item, idArticleGKGTheme, sep = '.')

          order_fields <-
            fields_df$item

          field_data <-
            fields_df %>%
            spread(item, value) %>%
            dplyr::select_(.dots = order_fields)

          field_data <-
            field_data %>%
            mutate_at(
              field_data %>% dplyr::select(dplyr::matches('charLoc')) %>% names(),
              funs(. %>% as.character() %>% parse_number())
            )
        } else {
          field_data <-
            fields_df

          field_data <-
            field_data %>%
            dplyr::select(idArticleGKGTheme, codeTheme, charLoc) %>%
            dplyr::mutate(charLoc = charLoc %>% as.numeric)
        }
      }

      return(field_data)
    }

  if (!theme_column %in% names(gdelt_data)) {
    stop("Sorry missing organization column")
  }

  col_names <-
    c('idGKG', theme_column)

  counts_data <-
    gdelt_data %>%
    dplyr::select_(.dots = col_names)
  names(counts_data)[2] <-
    'theme_col'

  if (filter_na) {
    counts_data <-
      counts_data %>%
      dplyr::filter(!theme_col %>% is.na())
  }

  all_counts <-
    seq_along(counts_data$theme_col) %>%
    map_dfr(function(x) {
      parse_mentioned_names_themes(field = counts_data$theme_col[x],
                                   return_wide = F) %>%
        dplyr::mutate(idGKG = counts_data$idGKG[x])
    }) %>%
    dplyr::select(idGKG, everything())

  if (theme_column == 'themes') {
    all_counts <-
      all_counts %>%
      dplyr::select(-charLoc)
  }

  all_counts <-
    all_counts %>%
    get_clean_count_data(count_col = 'idArticleGKGTheme', return_wide = return_wide) %>%
    separate(
      idGKG,
      into = c('GKG', 'dateTime'),
      sep = '\\-',
      remove = F
    ) %>%
    mutate(dateTime = dateTime %>% as.numeric) %>%
    arrange(dateTime) %>%
    dplyr::select(-c(dateTime, GKG)) %>%
    suppressWarnings()

  if (!return_wide) {
    all_counts <-
      all_counts %>%
      .resolve_long_names()
  }

  return(all_counts)
}

#' Returns social embed information from a gkg data frame
#'
#' @param gdelt_data
#' @param social_embed_column options \code{c('urlSocialMediaImageEmbeds', 'images', 'urlSocialMediaVideoEmbeds', 'video', 'videos'))}
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#'
#' @examples

parse_gkg_mentioned_social_embeds <-
  function(gdelt_data,
           social_embed_column = 'urlSocialMediaImageEmbeds',
           filter_na = T,
           return_wide = T) {
    image_video_cols <-
      c(
        'urlSocialMediaImageEmbeds',
        'images',
        'image',
        'urlSocialMediaVideoEmbeds',
        'video',
        'videos'
      )
    if (!social_embed_column %in% image_video_cols) {
      stop("Social embed column can only be\n" %>% paste0(paste0(image_video_cols, collapse = '\n')))
    }

    if (social_embed_column %in% c("image", "images")) {
      social_embed_column <-
        'urlSocialMediaImageEmbeds'
    }

    if (social_embed_column  %in% c("video", "videos")) {
      social_embed_column <-
        'urlSocialMediaVideoEmbeds'
    }
    parse_embeds <-
      function(field = "http://instagram.com/p/9YfHJtMx0N;http://instagram.com/p/BFz1t7Tsx8t;http://instagram.com/p/BEZrBSKsx8U;http://instagram.com/p/BEw5_T-Mx3B;",
               return_wide = F) {
        options(scipen = 99999)
        if (field %>% is.na) {
          if (return_wide) {
            field_data <-
              tibble(urlSocialMediaImageEmbed = NA)
          } else {
            field_data <-
              tibble(
                urlSocialMediaImageEmbed = NA,
                idArticleSocialMediaImageEmbed = 1
              )
          }
        }  else {
          fields <-
            field %>%
            str_split('\\;') %>%
            flatten_chr() %>%
            .[!. %in% '']

          fields_df <-
            tibble(urlSocialMediaImageEmbed = fields) %>%
            dplyr::mutate(
              idArticleSocialMediaImageEmbed = 1:n(),
              domainSocialMediaImageEmbed = urlSocialMediaImageEmbed %>% urltools::domain()
            )
          if (return_wide) {
            fields_df <-
              fields_df %>%
              gather(item, value, -idArticleSocialMediaImageEmbed) %>%
              arrange(idArticleSocialMediaImageEmbed) %>%
              unite(item, item, idArticleSocialMediaImageEmbed, sep = '.')

            order_fields <-
              fields_df$item

            field_data <-
              fields_df %>%
              spread(item, value) %>%
              dplyr::select_(.dots = order_fields)

          } else {
            field_data <-
              fields_df

            field_data <-
              field_data %>%
              dplyr::select(
                idArticleSocialMediaImageEmbed,
                domainSocialMediaImageEmbed,
                urlSocialMediaImageEmbed
              )
          }
        }

        return(field_data)
      }

    if (!social_embed_column %in% names(gdelt_data)) {
      stop("Sorry missing source embed column")
    }

    col_names <-
      c('idGKG', social_embed_column)

    counts_data <-
      gdelt_data %>%
      dplyr::select_(.dots = col_names)

    names(counts_data)[2] <-
      'source_col'

    if (filter_na) {
      counts_data <-
        counts_data %>%
        dplyr::filter(!source_col %>% is.na())
    }

    all_counts <-
      seq_along(counts_data$source_col) %>%
      map_dfr(function(x) {
        parse_embeds(field = counts_data$source_col[x],
                     return_wide = F) %>%
          dplyr::mutate(idGKG = counts_data$idGKG[x])
      })

    names(all_counts)[1] <-
      'idColumn'

    if (social_embed_column == 'urlSocialMediaImageEmbeds') {
      names(all_counts)[3] <-
        c('urlSocialMediaImageEmbed')

      names(all_counts)[1] <-
        c('idArticleSocialMediaImageEmbed')
    }

    if (social_embed_column == 'urlSocialMediaVideoEmbeds') {
      names(all_counts)[3] <-
        c('urlSocialMediaVideo.embed')
      names(all_counts)[1] <-
        c('idArticle.social_media.video_embed')
    }

    all_counts <-
      all_counts %>%
      dplyr::select(dplyr::matches("id^[A-Z]"), everything())

    id_col <-
      all_counts %>% select(dplyr::matches("^id[A-Z]")) %>% names() %>% .[[1]] %>% suppressWarnings()
    all_counts <-
      all_counts %>%
      get_clean_count_data(count_col = id_col, return_wide = return_wide) %>%
      separate(
        idGKG,
        into = c('GKG', 'dateTime'),
        sep = '\\-',
        remove = F
      ) %>%
      mutate(dateTime = dateTime %>% as.numeric) %>%
      arrange(dateTime) %>%
      dplyr::select(-c(dateTime, GKG)) %>%
      suppressWarnings()

    if (!return_wide) {
      all_counts <-
        all_counts %>%
        gather(item, value, -idGKG, na.rm = TRUE) %>%
        .resolve_long_names()
    }

    return(all_counts)
  }

#' Returns article tones from a gkg data frame
#'
#' @param gdelt_data
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#'
#' @examples
parse_gkg_mentioned_article_tone <-
  function(gdelt_data,
           filter_na = T,
           return_wide = T) {
    parse_article_tones <-
      function(field = "-4.65116279069767,1.55038759689922,62015503875969,7.75193798449612,13.1782945736434,0,134",
               return_wide = F) {
        options(scipen = 99999, digits = 5)
        if (field %>% is.na) {
          if (return_wide) {
            field_data <-
              tibble(amount.tone = NA)
          } else {
            field_data <-
              tibble(amount.tone = NA, idArticle.tone = 1)
          }
        }  else {
          fields <-
            field %>%
            str_split('\\,') %>%
            flatten_chr() %>%
            .[!. %in% ''] %>%
            as.numeric()

          fields_df <-
            tibble(amount.tone = fields) %>%
            dplyr::mutate(idArticle.tone = 1:n())
          if (return_wide) {
            fields_df <-
              fields_df %>%
              gather(item, value, -idArticle.tone) %>%
              arrange(idArticle.tone) %>%
              unite(item, item, idArticle.tone, sep = '.')

            order_fields <-
              fields_df$item

            field_data <-
              fields_df %>%
              spread(item, value) %>%
              dplyr::select_(.dots = order_fields)

          } else {
            field_data <-
              fields_df

            field_data <-
              field_data %>%
              dplyr::select(idArticle.tone, amount.tone)
          }
        }

        return(field_data)
      }

    if (!'tone' %in% names(gdelt_data)) {
      stop("Sorry missing tone column")
    }
    counts_data <-
      gdelt_data %>%
      dplyr::select(idGKG, tone)

    all_counts <-
      seq_along(counts_data$tone) %>%
      map_dfr(function(x) {
        parse_article_tones(field = counts_data$tone[x], return_wide = F) %>%
          dplyr::mutate(idGKG = counts_data$idGKG[x])
      }) %>%
      dplyr::select(idGKG, everything()) %>%
      dplyr::rename(scoreTone = amount.tone)

    if (filter_na) {
      all_counts <-
        all_counts %>%
        dplyr::filter(!scoreTone %>% is.na)
    }

    all_counts <-
      all_counts %>%
      get_clean_count_data(count_col = 'idArticle.tone',
                           extra_key = NA,
                           return_wide = return_wide) %>%
      arrange(idGKG) %>%
      separate(
        idGKG,
        into = c('GKG', 'dateTime'),
        sep = '\\-',
        remove = F
      ) %>%
      mutate(dateTime = dateTime %>% as.numeric) %>%
      arrange(dateTime) %>%
      dplyr::select(-c(dateTime, GKG)) %>%
      suppressWarnings()

    if (!return_wide) {
      all_counts <-
        all_counts %>%
        .resolve_long_names()
    }

    return(all_counts)
  }

#' Returns mentioned CAMEO event count from a gkg data frame
#'
#' @param gdelt_data
#' @param count_column options \code{c('count', 'counts'))}
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#'
#' @examples
parse_gkg_mentioned_event_counts <- function(gdelt_data,
                                             count_column = 'counts',
                                             filter_na = T,
                                             return_wide = T) {
  count_cols <-
    c('counts',
      'count',
      'countsCharLoc',
      'countCharLoc',
      'charLoc')

  if (!count_column %in% count_cols) {
    stop("Sorry count column can only be\n" %>%
           paste0(paste0(count_cols, collapse = '\n')))
  }

  if (count_column %in% c('counts', 'count')) {
    count_column <-
      'counts'
  }

  if (count_column %in% c('countsCharLoc', 'countCharLoc', 'charLoc')) {
    count_column <-
      'countsCharLoc'
  }

  parse_field_count <-
    function(field = "KIDNAP#60##4#Beirut, Beyrouth, Lebanon#LE#LE04#33.8719#35.5097#-801546;KIDNAP#2##1#Lebanon#LE#LE#33.8333#35.8333#LE;KIDNAP#4##1#Australia#AS#AS#-27#133#AS;",
             return_wide = F) {
      options(scipen = 99999, digits = 5)
      if (field %>% is.na) {
        if (return_wide) {
          field_data <-
            tibble(codeGKGTheme = NA)
        } else {
          field_data <-
            tibble(codeGKGTheme = NA,
                       idArticle.field = 1)
        }
      }  else {
        fields <-
          field %>%
          str_split('\\;') %>%
          flatten_chr() %>%
          .[!. %in% '']

        fields_df <-
          tibble(field_item = fields) %>%
          dplyr::mutate(idArticle.field = 1:n()) %>%
          separate(
            col = field_item,
            sep = '\\#',
            into = c(
              'codeGKGTheme',
              'countEvent',
              'entityEvent',
              'idTypeLocation',
              'location',
              'idCountry',
              'idADM1Code',
              'latitude',
              'longitude',
              'idFeature',
              'charLoc'
            )
          ) %>%
          suppressMessages() %>%
          suppressWarnings()

        fields_df <-
          fields_df %>%
          mutate_at(
            fields_df %>% dplyr::select(dplyr::matches("count|charLoc|idTypeLocation")) %>% dplyr::select(-idCountry) %>% names(),
            funs(. %>% as.numeric())
          ) %>%
          mutate_at(fields_df %>% dplyr::select(dplyr::matches("latitude|longitude")) %>% names(),
                    (funs(as.numeric(., digits = 5))))

        fields_df$entityEvent[fields_df$entityEvent == ''] <-
          NA

        fields_df$location[fields_df$location == ''] <-
          NA

        fields_df$idCountry[fields_df$idCountry == ''] <-
          NA

        fields_df$idADM1Code[fields_df$idADM1Code == ''] <-
          NA

        fields_df$idFeature[fields_df$idFeature == ''] <-
          NA

        fields_df <-
          fields_df %>%
          dplyr::left_join(tibble(
            idTypeLocation = 1:5,
            typeLocation = c(
              'country',
              'usState',
              'usCity',
              'worldCity',
              'worldState'
            )
          )) %>%
          dplyr::select(codeGKGTheme:idTypeLocation,
                        typeLocation,
                        everything()) %>%
          suppressMessages()

        if (return_wide) {
          fields_df <-
            fields_df %>%
            gather(item, value, -idArticle.field) %>%
            arrange(idArticle.field) %>%
            unite(item, item, idArticle.field, sep = '.')

          order_fields <-
            fields_df$item

          field_data <-
            fields_df %>%
            spread(item, value) %>%
            dplyr::select_(.dots = order_fields)

          field_data <-
            field_data %>%
            mutate_at(
              fields_df %>% dplyr::select(dplyr::matches("count|charLoc|idTypeLocation")) %>% dplyr::select(-one_of("idCountry")) %>% names(),
              funs(. %>% as.numeric())
            ) %>%
            mutate_at(fields_df %>% dplyr::select(dplyr::matches("latitude|longitude")) %>% names(),
                      (funs(as.numeric(., digits = 5))))

        } else {
          field_data <-
            fields_df

          field_data <-
            field_data %>%
            dplyr::select(idArticle.field, everything())
        }
      }

      return(field_data)
    }

  if (!count_column %in% names(gdelt_data)) {
    stop("Sorry missing count column")
  }

  col_names <-
    c('idGKG', count_column)

  counts_data <-
    gdelt_data %>%
    dplyr::select_(.dots = col_names)

  names(counts_data)[2] <-
    'count_col'

  if (filter_na) {
    counts_data <-
      counts_data %>%
      dplyr::filter(!count_col %>% is.na())
  }

  all_counts <-
    seq_along(counts_data$count_col) %>%
    map_dfr(function(x) {
      parse_field_count(field = counts_data$count_col[x],
                        return_wide = F) %>%
        dplyr::mutate(idGKG = counts_data$idGKG[x])
    }) %>%
    dplyr::select(idGKG, everything()) %>%
    select(which(colMeans(is.na(.)) < 1))

  all_counts <-
    all_counts %>%
    get_clean_count_data(count_col = 'idArticle.field', return_wide = return_wide) %>%
    separate(
      idGKG,
      into = c('GKG', 'dateTime'),
      sep = '\\-',
      remove = F
    ) %>%
    mutate(dateTime = dateTime %>% as.numeric) %>%
    arrange(dateTime) %>%
    dplyr::select(-c(dateTime, GKG)) %>%
    suppressWarnings()

  if (!return_wide) {
    all_counts <-
      all_counts %>%
      .resolve_long_names()
  }

  return(all_counts)
}

#' Returns mentioned locations from a gkg data frame
#'
#' @param gdelt_data
#' @param location_column options \code{c('location', 'locations', 'locationsCharLoc', 'locationCharLoc', 'charLoc'))}
#' @param isCharLoc
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#'
#' @examples
parse_gkg_mentioned_locations <-
  function(gdelt_data,
           location_column = 'locations',
           isCharLoc = F,
           filter_na = T,
           return_wide = T) {
    location_cols <-
      c('location',
        'locations',
        'locationsCharLoc',
        'locationCharLoc',
        'charLoc')

    if (!location_column %in% location_cols) {
      stop("Sorry location column can only be\n" %>%
             paste0(paste0(location_cols, collapse = '\n')))
    }

    if (location_column %in% c('location', 'locations')) {
      location_column <-
        'locations'
    }

    if (location_column %in% c('locationsCharLoc', 'locationCharLoc', 'charLoc')) {
      location_column <-
        'locationsCharLoc'
    }
    parse_location_count <-
      function(field = "4#Leichhardt, New South Wales, Australia#AS#AS02#4944#-33.8833#151.15#-1583352#203;4#Daintree, Queensland, Australia#AS#AS04#40202#-1625#145.317#-1568710#421;4#Daintree, Queensland, Australia#AS#AS04#40202#-1625#145.317#-1568710#2224",
               return_wide = F) {
        options(scipen = 99999, digits = 5)
        if (field %>% is.na) {
          if (return_wide) {
            field_data <-
              tibble(location = NA)
          } else {
            field_data <-
              tibble(location = NA, idArticle.location = 1)
          }
        }  else {
          fields <-
            field %>%
            str_split('\\;') %>%
            flatten_chr() %>%
            .[!. %in% '']

          fields_df <-
            tibble(field_item = fields) %>%
            dplyr::mutate(idArticle.location = 1:n())


          if (isCharLoc) {
            fields_df <-
              fields_df %>%
              separate(
                col = field_item,
                sep = '\\#',
                into = c(
                  'idTypeLocation',
                  'location',
                  'idCountry',
                  'idADM1Code',
                  'idADM2Code',
                  'latitude',
                  'longitude',
                  'idFeature',
                  'charLoc'
                )
              ) %>%
              suppressMessages() %>%
              suppressWarnings()

          } else {
            fields_df <-
              fields_df %>%
              separate(
                col = field_item,
                sep = '\\#',
                into = c(
                  'idTypeLocation',
                  'location',
                  'idCountry',
                  'idADM1Code',
                  'latitude',
                  'longitude',
                  'idFeature'
                )
              ) %>%
              suppressMessages() %>%
              suppressWarnings()
          }

          fields_df <-
            fields_df %>%
            mutate_at(
              fields_df %>% dplyr::select(dplyr::matches("idTypeLocation|charLoc")) %>% names(),
              funs(. %>% as.numeric())
            ) %>%
            mutate_at(fields_df %>% dplyr::select(dplyr::matches("latitude|longitude")) %>% names(),
                      (funs(as.numeric(., digits = 5)))) %>%
            dplyr::left_join(tibble(
              idTypeLocation = 1:5,
              typeLocation = c(
                'country',
                'usState',
                'usCity',
                'worldCity',
                'worldState'
              )
            )) %>%
            suppressMessages() %>%
            dplyr::select(idTypeLocation, typeLocation, everything()) %>%
            suppressWarnings()


          fields_df$location[fields_df$location == ''] <-
            NA

          fields_df$idCountry[fields_df$idCountry == ''] <-
            NA

          fields_df$idADM1Code[fields_df$idADM1Code == ''] <-
            NA

          fields_df$idFeature[fields_df$idFeature == ''] <-
            NA

          if (return_wide) {
            fields_df <-
              fields_df %>%
              gather(item, value, -idArticle.location) %>%
              arrange(idArticle.location) %>%
              unite(item, item, idArticle.location, sep = '.')

            order_fields <-
              fields_df$item

            field_data <-
              fields_df %>%
              spread(item, value) %>%
              dplyr::select_(.dots = order_fields)

            field_data <-
              field_data %>%
              mutate_at(
                fields_df %>% dplyr::select(dplyr::matches("idTypeLocation|charLoc")) %>% names(),
                funs(. %>% as.numeric())
              ) %>%
              mutate_at(fields_df %>% dplyr::select(dplyr::matches("latitude|longitude")) %>% names(),
                        (funs(as.numeric(., digits = 5))))
          } else {
            field_data <-
              fields_df

            field_data <-
              field_data %>%
              dplyr::select(idArticle.location, everything())
          }
        }

        return(field_data)
      }

    if (!location_column %in% names(gdelt_data)) {
      stop("Sorry missing location column")
    }

    col_names <-
      c('idGKG', location_column)

    counts_data <-
      gdelt_data %>%
      dplyr::select_(.dots = col_names)

    names(counts_data)[2] <-
      'loc_col'

    if (filter_na) {
      counts_data <-
        counts_data %>%
        dplyr::filter(!loc_col %>% is.na())
    }

    all_counts <-
      seq_along(counts_data$loc_col) %>%
      map_dfr(function(x) {
        parse_location_count(field = counts_data$loc_col[x],
                             return_wide = F) %>%
          dplyr::mutate(idGKG = counts_data$idGKG[x])
      }) %>%
      dplyr::select(idGKG, everything())

    if (filter_na) {
      all_counts <-
        all_counts %>%
        dplyr::filter(!location %>% is.na())
    }
    all_counts <-
      all_counts %>%
      get_clean_count_data(count_col = 'idArticle.location', return_wide = return_wide) %>%
      separate(
        idGKG,
        into = c('GKG', 'dateTime'),
        sep = '\\-',
        remove = F
      ) %>%
      mutate(dateTime = dateTime %>% as.numeric) %>%
      arrange(dateTime) %>%
      dplyr::select(-c(dateTime, GKG)) %>%
      suppressWarnings()

    if (!return_wide) {
      all_counts <-
        all_counts %>%
        .resolve_long_names()
    }


    return(all_counts)
  }

#' Returns mentioned dates from gkg data frame
#'
#' @param gdelt_data
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#'
#' @examples
parse_gkg_mentioned_dates <- function(gdelt_data,
                                      filter_na = T,
                                      return_wide = T) {
  parse_dates <-
    function(field = "4#6#16#0#734;4#4#26#0#2258",
             return_wide = F) {
      options(scipen = 99999, digits = 5)
      if (field %>% is.na) {
        if (return_wide) {
          field_data <-
            tibble(idDateResolution = NA)
        } else {
          field_data <-
            tibble(idDateResolution = NA,
                       idDateArticle = NA)
        }
      }  else {
        fields <-
          field %>%
          str_split('\\;') %>%
          flatten_chr() %>%
          .[!. %in% '']

        fields_df <-
          tibble(field_item = fields) %>%
          dplyr::mutate(idDateArticle = 1:n()) %>%
          separate(
            col = field_item,
            sep = '\\#',
            into = c('idDateResolution', 'month', 'day', 'year', 'charLoc')
          ) %>%
          suppressMessages() %>%
          suppressWarnings()

        fields_df <-
          fields_df %>%
          mutate_at(fields_df  %>% names(),
                    funs(. %>% as.numeric())) %>%
          dplyr::left_join(tibble(
            idDateResolution = 1:4,
            dateResolution = c(
              'ex_mon_date',
              'year_only',
              'month_date' ,
              'fully_resolved'
            )
          )) %>%
          suppressMessages() %>%
          dplyr::select(idDateResolution, dateResolution, everything())

        if (return_wide) {
          fields_df <-
            fields_df %>%
            gather(item, value, -idDateArticle) %>%
            arrange(idDateArticle) %>%
            unite(item, item, idDateArticle, sep = '.')

          order_fields <-
            fields_df$item

          field_data <-
            fields_df %>%
            spread(item, value) %>%
            dplyr::select_(.dots = order_fields)

          field_data <-
            field_data %>%
            mutate_at(field_data %>% dplyr::select(dplyr::matches(
              "idDateResolution|month|day|year|charLoc"
            )) %>% names(),
            funs(. %>% as.numeric()))

        } else {
          field_data <-
            fields_df

          field_data <-
            field_data %>%
            dplyr::select(idDateArticle, everything())
        }
      }

      return(field_data)
    }

  if (!'dates' %in% names(gdelt_data)) {
    stop("Sorry missing date column")
  }
  counts_data <-
    gdelt_data %>%
    dplyr::select(idGKG, dates)

  if (filter_na) {
    counts_data <-
      counts_data %>%
      dplyr::filter(!dates %>% is.na)
  }

  all_counts <-
    seq_along(counts_data$dates) %>%
    map_dfr(function(x) {
      parse_dates(field = counts_data$dates[x], return_wide =  F) %>%
        dplyr::mutate(idGKG = counts_data$idGKG[x])
    }) %>%
    dplyr::select(idGKG, everything()) %>%
    dplyr::select(-idDateResolution)

  all_counts <-
    all_counts %>%
    get_clean_count_data(count_col = 'idDateArticle', return_wide = return_wide) %>%
    separate(
      idGKG,
      into = c('GKG', 'dateTime'),
      sep = '\\-',
      remove = F
    ) %>%
    mutate(dateTime = dateTime %>% as.numeric) %>%
    arrange(dateTime) %>%
    dplyr::select(-c(dateTime, GKG)) %>%
    suppressWarnings()

  if (!return_wide) {
    all_counts <-
      all_counts %>%
      .resolve_long_names()
  }

  return(all_counts)
}

#' Returns mentioned quotes from a gkg data frame
#'
#' @param gdelt_data
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#'
#' @examples
parse_gkg_mentioned_quotes <-
  function(gdelt_data,
           filter_na = T,
           return_wide = T) {
    parse_quotes <-
      function(field = "495|51||knowingly aided and abetted an international kidnap#865|50||nothing less than an international child abduction#2764|49|| staff member should be singled out for dismissal#3373|48||make any serious attempt to independently verify#4059|46||wants to go through every single little detail#4802|156||And xC2 ; xA0 ; you're keeping all of xC2 ; xA0 ; them xC2 ; xA0 ; - xC2 ; xA0 ; except one sacrificial lamb - xC2 ; xA0 ; to run the show?#4879|28||How do you think that looks?#6093|60||an extraordinary conspiracy to remove the children illegally#6150|50||nothing less than an international child abduction#6828|408||I xE2 ; x80 ; xA6 ; have found nothing that supports a finding that any Australian Government official somehow knowingly assisted the mother to do something that was wrong xE2 ; x80 ; xA6 ; I do not find xE2 ; x80 ; xA6 ; that any Australian Embassy officials who helped the mother did so knowing that the mother did not have the father consent to remove the girls permanently from Italy",
               return_wide = F) {
        options(scipen = 99999, digits = 5)
        if (field %>% is.na) {
          if (return_wide) {
            field_data <-
              tibble(idArticleQuote = NA)
          } else {
            field_data <-
              tibble(quote = NA,
                         idArticleQuote = NA)
          }
        }  else {
          fields <-
            field %>%
            str_split('\\#') %>%
            flatten_chr() %>%
            .[!. %in% '']

          fields_df <-
            tibble(quote_items = fields) %>%
            dplyr::mutate(idArticleQuote = 1:n()) %>%
            separate(
              col = quote_items,
              sep = '\\|',
              into = c('charLoc', 'lengthQuote', 'verbIntro', 'textQuote')
            ) %>%
            suppressMessages() %>%
            suppressWarnings()

          fields_df <-
            fields_df %>%
            dplyr::mutate(textQuote = textQuote %>% str_trim) %>%
            mutate_at(c('charLoc', 'lengthQuote'),
                      funs(. %>% as.numeric())) %>%
            dplyr::select(idArticleQuote, everything())

          fields_df$verbIntro[fields_df$verbIntro == ''] <-
            NA

          if (return_wide) {
            fields_df <-
              fields_df %>%
              gather(item, value, -idArticleQuote) %>%
              arrange(idArticleQuote) %>%
              unite(item, item, idArticleQuote, sep = '.')

            order_fields <-
              fields_df$item

            field_data <-
              fields_df %>%
              spread(item, value) %>%
              dplyr::select_(.dots = order_fields)

            field_data <-
              field_data %>%
              mutate_at(field_data %>% dplyr::select(dplyr::matches("charLoc|lengthQuote")) %>% names(),
                        funs(. %>% as.numeric()))

          } else {
            field_data <-
              fields_df

            field_data <-
              field_data %>%
              dplyr::select(idArticleQuote, everything())
          }
        }

        return(field_data)
      }

    if (!'quotations' %in% names(gdelt_data)) {
      stop("Sorry missing quotations column")
    }

    counts_data <-
      gdelt_data %>%
      dplyr::select(idGKG, quotations)

    if (filter_na) {
      counts_data <-
        counts_data %>%
        dplyr::filter(!quotations %>% is.na())
    }

    all_counts <-
      seq_along(counts_data$quotations) %>%
      map_dfr(function(x) {
        parse_quotes(field = counts_data$quotations[x], return_wide = F) %>%
          dplyr::mutate(idGKG = counts_data$idGKG[x])
      }) %>%
      dplyr::select(idGKG, everything())

    if (all_counts$verbIntro %>% is.na() %>% as.numeric() %>% sum == nrow(all_counts)) {
      all_counts <-
        all_counts %>%
        dplyr::select(-verbIntro)
    }

    all_counts <-
      all_counts %>%
      get_clean_count_data(count_col = 'idArticleQuote', return_wide = T) %>%
      separate(
        idGKG,
        into = c('GKG', 'dateTime'),
        sep = '\\-',
        remove = F
      ) %>%
      mutate(dateTime = dateTime %>% as.numeric) %>%
      arrange(dateTime) %>%
      dplyr::select(-c(dateTime, GKG)) %>%
      suppressWarnings()

    if (!return_wide) {
      all_counts <-
        all_counts %>%
        gather(item, value, -idGKG, na.rm = TRUE) %>%
        .resolve_long_names()
    }

    return(all_counts)
  }

#' Returns GCAM codes from a gkg data frame
#'
#' @param gdelt_data
#' @param merge_gcam_codes
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#'
#' @examples
parse_gkg_mentioned_gcams <- function(gdelt_data,
                                      merge_gcam_codes = F,
                                      filter_na = T,
                                      return_wide = T) {
  parse_gcam_data <-
    function(field = "wc:284,c121:5",
             return_wide = F) {
      options(scipen = 99999)
      if (field %>% is.na) {
        if (return_wide) {
          field_data <-
            tibble(idGCAM = NA)
        } else {
          field_data <-
            tibble(idGCAM = NA,
                       idArticleGCAM = 1)
        }
      }  else {
        fields <-
          field %>%
          str_split('\\,') %>%
          flatten_chr() %>%
          .[!. %in% '']

        articleWordCount <-
          fields[1] %>%
          as.character() %>%
          parse_number()

        fields_df <-
          tibble(articleWordCount,
                     idGCAM = fields[2:length(fields)]) %>%
          separate(idGCAM,
                   into = c('idGCAM', 'scoreGoldsteinWords'),
                   sep = '\\:') %>%
          dplyr::mutate(
            idArticleGCAM = 1:n(),
            scoreGoldsteinWords = scoreGoldsteinWords %>% as.numeric(., digits = 4)
          )


        if (return_wide) {
          fields_df <-
            fields_df %>%
            gather(item, value, -c(articleWordCount, idArticleGCAM)) %>%
            arrange(idArticleGCAM) %>%
            unite(item, item, idArticleGCAM, sep = '.')

          order_fields <-
            fields_df$item

          field_data <-
            fields_df %>%
            spread(item, value) %>%
            dplyr::select_(.dots = order_fields)

          field_data <-
            field_data %>%
            mutate_at(field_data %>% dplyr::select(dplyr::matches("scoreGoldsteinWords")) %>% names(),
                      funs(. %>% as.numeric()))

        } else {
          field_data <-
            fields_df

          field_data <-
            field_data %>%
            dplyr::select(idArticleGCAM, everything())
        }
      }

      return(field_data)
    }

  if (!'gcam' %in% names(gdelt_data)) {
    stop("Sorry missing video embed column")
  }

  counts_data <-
    gdelt_data %>%
    dplyr::select(idGKG, gcam)

  if (filter_na) {
    counts_data <-
      counts_data %>%
      dplyr::filter(!gcam %>% is.na())
  }

  all_counts <-
    seq_along(counts_data$gcam) %>%
    map_dfr(function(x) {
      parse_gcam_data(field = counts_data$gcam[x],
                      return_wide = F) %>%
        dplyr::mutate(idGKG = counts_data$idGKG[x])
    }) %>%
    dplyr::select(idGKG, everything())

  if (merge_gcam_codes) {
    all_counts <-
      all_counts %>%
      dplyr::left_join(
        get_codes_gcam() %>%
          dplyr::select(
            idGCAM,
            typeDictionary,
            dictionaryHumanName,
            dimensionHumanName
          )
      )
  }

  all_counts <-
    all_counts %>%
    get_clean_count_data(count_col = 'idArticleGCAM',
                         extra_key = 'articleWordCount',
                         return_wide = return_wide) %>%
    separate(
      idGKG,
      into = c('GKG', 'dateTime'),
      sep = '\\-',
      remove = F
    ) %>%
    mutate(dateTime = dateTime %>% as.numeric) %>%
    arrange(dateTime) %>%
    dplyr::select(-c(dateTime, GKG)) %>%
    suppressWarnings()

  if (!return_wide) {
    all_counts <-
      all_counts %>%
      .resolve_long_names()
  }

  return(all_counts)
}


#' Returns source name or source url from a gkg data frame
#'
#' @param gdelt_data
#' @param source_column options \code{c('sources', 'source', 'sources.url', 'source.url'))}
#' @param filter_na
#' @param return_wide
#' @importFrom urltools domain
#'
#' @return
#' @export
#'
#' @examples
parse_gkg_mentioned_source_data <-
  function(gdelt_data,
           source_column = 'sources',
           filter_na = T,
           return_wide = F) {
    source_options <-
      c('sources', 'source', 'urlSources', 'urlSource')
    if (!source_column %in% source_options) {
      stop("Sorry source column can only be\n" %>%
             paste0(paste0(source_options, collapse = '\n')))
    }

    if (source_column %in% c('sources', 'source')) {
      source_column <-
        'sources'
    }

    if (source_column %in% c('urlSources', 'urlSource')) {
      source_column <-
        'urlSources'
    }
    parse_source_name <-
      function(field = "businesstimes.com.sg;businesstimes.com.sg",
               return_wide = F) {
        options(scipen = 99999)
        if (field %>% is.na) {
          if (return_wide) {
            field_data <-
              tibble(nameSource = NA)
          } else {
            field_data <-
              tibble(nameSource = NA,
                         idArticle.source1 = 1)
          }
        }  else {
          fields <-
            field %>%
            str_split('\\;') %>%
            flatten_chr() %>%
            .[!. %in% ''] %>%
            unique

          fields_df <-
            tibble(nameSource = fields) %>%
            dplyr::mutate(idArticle.source = 1:n())
          if (return_wide) {
            fields_df <-
              fields_df %>%
              gather(item, value, -idArticle.source) %>%
              arrange(idArticle.source) %>%
              unite(item, item, idArticle.source, sep = '.')

            order_fields <-
              fields_df$item

            field_data <-
              fields_df %>%
              spread(item, value) %>%
              dplyr::select_(.dots = order_fields)

          } else {
            field_data <-
              fields_df

            field_data <-
              field_data %>%
              dplyr::select(idArticle.source,
                            everything())
          }
        }

        return(field_data)
      }

    if (!source_column %in% names(gdelt_data)) {
      stop("Sorry missing source column")
    }

    col_names <-
      c('idGKG', source_column)

    counts_data <-
      gdelt_data %>%
      dplyr::select_(.dots = col_names)

    names(counts_data)[2] <-
      'source_col'

    if (filter_na) {
      counts_data <-
        counts_data %>%
        dplyr::filter(!source_col %>% is.na())
    }

    all_counts <-
      seq_along(counts_data$source_col) %>%
      map_dfr(function(x) {
        parse_source_name(field = counts_data$source_col[x],
                          return_wide = return_wide) %>%
          dplyr::mutate(idGKG = counts_data$idGKG[x])
      })

    if (source_column == 'urlSources') {
      names(all_counts)[2] <-
        c('urlSource')
    }

    if (source_column == 'sources') {
      names(all_counts)[2] <-
        c('nameSource')
    }

    all_counts <-
      all_counts %>%
      dplyr::select(idGKG, everything())

    if (!return_wide) {
      all_counts <-
        all_counts %>%
        .resolve_long_names()
    }

    return(all_counts)
  }


# data_ac -----------------------------------------------------------------


.get_data_gkg_day_detailed <-
  function(date_data = "2016-06-01",
           table_name = "gkg",
           file_directory =  NULL,
           folder_name = 'gdelt_data',
           only_most_recent = F,
           remove_files = T,
           empty_trash = T,
           return_message = T) {
    if (only_most_recent) {
      date_data <-
        Sys.Date()
    }

    if (!date_data %>% substr(5, 5) == "-") {
      stop("Sorry data must be in YMD format, ie, 2016-06-01")
    }
    tables <-
      c('gkg', 'export', 'mentions')
    if (!table_name %in% tables) {
      stop("Sorry tables can only be:\n" %>% paste0(paste0(tables, collapse = '\n')))
    }

    date_data <-
      date_data %>%
      ymd() %>% as.Date()


    if (date_data < "2015-02-18") {
      stop("Sorry data starts on February 18th, 2015")
    }

    if (date_data > Sys.Date()) {
      stop("Sorry data can't go into the future")
    }

    if (only_most_recent) {
      gdelt_detailed_logs <-
        get_urls_gkg_most_recent_log()
      urls <-
        gdelt_detailed_logs %>%
        dplyr::filter(nameFile == table_name) %>%
        .$urlData
    } else {
      if (!'gdelt_detailed_logs' %>% exists()) {
        gdelt_detailed_logs <-
          get_urls_gkg_15_minute_log()

        assign(x = 'gdelt_detailed_logs',
               eval(gdelt_detailed_logs),
               env = .GlobalEnv)

      }

      urls <-
        gdelt_detailed_logs %>%
        dplyr::filter(dateData == date_data) %>%
        dplyr::filter(nameFile == table_name) %>%
        .$urlData
    }

    get_gdelt_url_data_safe <-
      possibly(get_gdelt_url_data, tibble())

    all_data <-
      urls %>%
      map_dfr(function(x) {
        data <-
          get_gdelt_url_data_safe(
            url = x,
            remove_files = remove_files,
            file_directory = file_directory,
            folder_name = folder_name,
            return_message = return_message,
            empty_trash = empty_trash
          )
        data %>%
          mutate_at(.vars = data %>% dplyr::select(dplyr::matches('idFeatureActor|idFeatureAction')) %>% names(),
                    .funs = as.character)
      }) %>%
      distinct() %>%
      suppressWarnings()

    if ('domainSource' %in% names(all_data)) {
      all_data <-
        all_data %>%
        mutate(domainSource = documentSource %>% urltools::domain())
    }

    if ('urlSource' %in% names(all_data)) {
      all_data <-
        all_data %>%
        mutate(domainSource = urlSource %>% urltools::domain())
    }

    if (return_message) {
      "You retrieved " %>%
        paste0(all_data %>% nrow, " gkg detailed events for ", date_data) %>%
        cat(fill = T)
    }

    return(all_data)
  }


#' Get dates detailed data from a specified table
#'
#' @param date_data must be a date in Year - Month - Day format
#' @param table_name the name of the table
#' options \code{c('gkg', 'export', 'mentions'))}
#' @param file_directory where are the files to be saved
#' @param remove_files Do you want to remove the files
#' \code{T, F}
#' @param empty_trash Do You want to empy the trash
#' \code{T, F}
#' @param return_message Do you want to return a message
#' \code{T, F}
#' @importFrom purrr flatten_chr
#' @importFrom readr parse_number
#' @importFrom purrr compact
#' @import dplyr utils dplyr purrr readr
#' @importFrom urltools domain
#' @importFrom curl curl_download curl
#' @importFrom urltools domain
#' @importFrom purrr map
#' @return
#' @export
#'
#' @examples

get_data_gkg_days_detailed <-
  function(dates = c("2016-07-19"),
           table_name = c("gkg"),
           file_directory = NULL,
           folder_name = 'gdelt_data',
           only_most_recent = F,
           remove_files = T,
           empty_trash = T,
           return_message = T) {
    .get_data_gkg_day_detailed_safe <-
    possibly(.get_data_gkg_day_detailed, tibble())

  var_matrix <-
    expand.grid(date = dates,
                table_name = table_name,
                stringsAsFactors = F) %>%
    as_tibble()

  all_data <-
    seq_len(var_matrix %>% nrow()) %>%
    map_dfr(
      function(x)
        .get_data_gkg_day_detailed_safe(
          date_data = var_matrix$date[x],
          table_name = var_matrix$table_name[x],
          only_most_recent = only_most_recent,
          file_directory = file_directory,
          folder_name = folder_name,
          remove_files = remove_files,
          empty_trash = empty_trash,
          return_message = return_message
        )
    ) %>%
    suppressWarnings()

  if ('idDateTime' %in% names(all_data)) {
    all_data <-
      all_data %>%
      dplyr::rename(idDateTime = idDateTimeArticle) %>%
      dplyr::mutate(idDateTime = 1:n()) %>%
      separate(idGKG, sep = '\\-', c('dateCode', 'remove')) %>%
      unite(idGKG,
            dateCode,
            idDateTime,
            sep = '-',
            remove = F) %>%
      dplyr::select(-c(dateCode, remove))
  }

  return(all_data)
}

#' Retrieves gkg summary file for a given day
#'
#' @param date_data Date of the data, must be in year, month day form.
#' @param file_directory location of where the file is to be saved
#' @param is_count_file options
#' \code{c(TRUE, FALSE)}
#' @param remove_files
#' \code{c(TRUE, FALSE)}
#' @param empty_trash Empty trash
#' \code{c(TRUE, FALSE)}
#' @param return_message
#' \code{c(TRUE, FALSE)}
#' @importFrom purrr flatten_chr
#' @importFrom readr parse_number
#' @importFrom purrr compact
#' @import dplyr utils dplyr purrr readr
#' @importFrom urltools domain
#' @importFrom curl curl_download curl
#' @return
#'
#' @examples

get_data_gkg_day_summary <- function(date_data = "2016-06-01",
                                     file_directory = NULL,
                                     folder_name = 'gdelt_data',
                                     is_count_file = F,
                                     remove_files = T,
                                     empty_trash = T,
                                     return_message = T) {
  options(scipen = 99999)
  if (!date_data %>% substr(5, 5) == "-") {
    stop("Sorry data must be in YMD format, ie, 2016-06-01")
  }

  date_data <-
    date_data %>%
    ymd %>% as.Date()


  if (date_data < "2013-04-01") {
    stop("Sorry data starts on April 1st, 2013")
  }

  if (date_data > Sys.Date()) {
    stop("Sorry data can't go into the future")
  }
  if (!'summary_data_urls' %>% exists()) {
    summary_data_urls <-
      get_urls_gkg_daily_summaries(return_message = return_message)

    assign(x = 'summary_data_urls', eval(summary_data_urls), env = .GlobalEnv)
  }

  if (is_count_file) {
    summary_data_urls <-
      summary_data_urls %>%
      dplyr::filter(isCountFile == T)
  } else {
    summary_data_urls <-
      summary_data_urls %>%
      dplyr::filter(isCountFile == F)
  }

  urls <-
    summary_data_urls %>%
    dplyr::filter(dateData == date_data) %>%
    .$urlData

  get_gdelt_url_data_safe <-
    possibly(get_gdelt_url_data, tibble())

  all_data <-
    urls %>%
    map_dfr(function(x) {
      get_gdelt_url_data_safe(
        url = x,
        remove_files = remove_files,
        file_directory = file_directory,
        folder_name = folder_name,
        return_message = return_message,
        empty_trash = empty_trash
      )
    }) %>%
    distinct()

  if ('countObject' %in% names(all_data)) {
    all_data <-
      all_data %>%
      dplyr::mutate(countObject = countObject %>% as.numeric())
  }
  if ('idCAMEOEvents' %in% names(all_data)) {
    all_data <-
      all_data %>%
      mutate(idCAMEOEvents = idCAMEOEvents %>% as.character())
  }

  if (is_count_file == F) {
    all_data <-
      all_data %>%
      dplyr::rename(dateEvent = date)
  }

  all_data <-
    all_data %>%
    mutate(isCountFile = if_else(is_count_file, T, F))


  if (return_message) {
    "You retrieved " %>%
      paste0(all_data %>% nrow(), " gkg summary events for ", date_data) %>%
      cat(fill = T)
  }

  return(all_data)
}

#' Gets days summary GDELT GKG data by table
#'
#' @param dates
#' @param file_directory
#' @param remove_files
#' @param empty_trash
#' @param return_message
#'
#' @return
#' @export
#' @examples
#' get_data_gkg_days_summary(dates = c("2016-07-18"), is_count_file = c(T, F))

get_data_gkg_days_summary <- function(dates = c("2016-06-01"),
                                      is_count_file = c(T, F),
                                      file_directory = NULL,
                                      folder_name = 'gdelt_data',
                                      remove_files = T,
                                      empty_trash = T,
                                      nest_data = F,
                                      return_message = T) {
  get_data_gkg_day_summary_safe <-
    possibly(get_data_gkg_day_summary, tibble())

  var_matrix <-
    expand.grid(
      date = dates,
      is_count_file = is_count_file,
      stringsAsFactors = F
    ) %>%
    as_tibble %>%
    suppressWarnings()

  all_data <-
    seq_len(var_matrix %>% nrow) %>%
    map_dfr(
      function(x)
        get_data_gkg_day_summary_safe(
          date_data = var_matrix$date[x],
          is_count_file = var_matrix$is_count_file[x],
          file_directory = file_directory,
          folder_name = folder_name,
          remove_files = remove_files,
          empty_trash = empty_trash,
          return_message = return_message
        ) %>% suppressWarnings()
    ) %>%
    arrange(idGKG) %>%
    suppressWarnings()

  return(all_data)
}

#' Retreive GDELT data for a given period
#'
#' @param period the GDELT period
#' @param file_directory
#' @param remove_files
#' @param empty_trash
#' @param return_message
#' @importFrom purrr compact
#' @importFrom purrr flatten_chr
#' @importFrom readr parse_number
#' @importFrom purrr compact
#' @import dplyr
#' @import utils
#' @importFrom urltools domain
#' @importFrom curl curl_download curl
#' @return
#'
#' @examples
#' get_data_gdelt_period_event(period = 1983)
get_data_gdelt_period_event <- function(period = 1983,
                                        file_directory = NULL,
                                        folder_name = 'gdelt_data',
                                        remove_files = T,
                                        empty_trash = T,
                                        return_message = T) {
  period <-
    period %>%
    as.character() %>%
    str_replace_all("\\-", '')

  if (!'gdelt_event_urls' %>% exists()) {
    gdelt_event_urls <-
      get_urls_gdelt_event_log(return_message = return_message)

    assign(x = 'gdelt_event_urls', eval(gdelt_event_urls), env = .GlobalEnv)
  }
  periods <-
    gdelt_event_urls$periodData
  if (!period %in% periods) {
    gdelt_event_message <-
      "Period can only be a 4 digit year between 1979 and 2005\nEXAMPLE: 1983\nA 6 digit year and month from 2006 to March 2013\nEXAMPLE: 201208\nOr an 8 digit year, month, day from March 1, 2013 until today\nEXAMPLE: 20140303"

    stop(gdelt_event_message)
  }

  urls <-
    gdelt_event_urls %>%
    dplyr::filter(periodData == period) %>%
    .$urlData

  get_gdelt_url_data_safe <-
    possibly(get_gdelt_url_data, tibble())

  all_data <-
    urls %>%
    map_dfr(function(x) {
      get_gdelt_url_data_safe(
        url = x,
        remove_files = remove_files,
        file_directory = file_directory,
        folder_name = folder_name,
        return_message = return_message,
        empty_trash = empty_trash
      )
    })

  if (return_message) {
    "You retrieved " %>%
      paste0(all_data %>% nrow, " GDELT events for the period of ", period) %>%
      cat(fill = T)
  }

  return(all_data)
}


#' Returns GDELT event data for a given periods
#'
#' @param periods
#' @param file_directory
#' @param is_count_file
#' @param remove_files
#' @param empty_trash
#' @param return_message
#' @import dplyr stringr purrr tidyr readr lubridate
#' @return
#' @export
#'
#' @examples
#' get_data_gdelt_periods_event (periods = c(1983))
get_data_gdelt_periods_event <- function(periods = c(1983, 1989),
                                         file_directory = NULL,
                                         folder_name = 'gdelt_data',
                                         remove_files = T,
                                         empty_trash = T,
                                         return_message = T) {
  get_data_gdelt_period_event_safe <-
    possibly(get_data_gdelt_period_event, tibble())
  periods <-
    periods %>%
    str_replace_all('\\-', '')
  all_data <-
    seq_along(periods) %>%
    map_dfr(
      function(x)
        get_data_gdelt_period_event_safe(
          period = periods[x],
          file_directory = file_directory,
          folder_name = folder_name,
          remove_files = remove_files,
          empty_trash = empty_trash,
          return_message = return_message
        )
    ) %>%
    suppressWarnings()

  return(all_data)
}

#' get_vgkg_schema
#'
#' @return
#' @importFrom dplyr tibble
#'
#' @examples
get_vgkg_schema  <- function() {
  cv_schema <-
    tibble(
      nameGDELT = c(
        "DATE",
        "DocumentIdentifier",
        "ImageURL",
        "Labels",
        "GeoLandmarks",
        "Logos",
        "SafeSearch",
        "Faces",
        "OCR",
        "LangHints",
        "WidthHeight",
        "RawJSON"
      ),
      nameActual =
        c(
          "dateTimeDocument",
          "documentSource",
          "urlImage",
          "xmlLabels",
          "xmlGeoLandmarks",
          "xmlLogos",
          "xmlSafeSearch",
          "xmlFaces",
          "xmlOCR",
          "codesLanguages",
          "dimWidthHeight",
          "jsonCloudVision"
        )

    )
  return(cv_schema)
}

#' get_urls_vgkg
#'
#' @return
#' @export
#' @importFrom lubridate ymd_hms
#' @importFrom dplyr tibble
#' @examples
get_urls_vgkg <- function() {
  options(scipen = 999999)
  cloud_vision_start_hms <-
    20160222113000

  time_now <-
    Sys.time() %>% format("%Y%m%d%H%M%S") %>% as.numeric()

  all_dates <-
    seq(ymd_hms(cloud_vision_start_hms),
        to = time_now %>% ymd_hms,
        by = '15 min')

  url_df <-
    tibble(
      dateTimeData = all_dates,
      isoPeriod = dateTimeData %>% format("%Y%m%d%H%M%S") %>% as.numeric(),
      urlCloudVisionTags = 'http://data.gdeltproject.org/gdeltv2_cloudvision/' %>% paste0(isoPeriod, '.imagetags.csv.gz'),
      urlTranslationTags = 'http://data.gdeltproject.org/gdeltv2_cloudvision/' %>% paste0(isoPeriod, '.translation.imagetags.csv.gz')
    ) %>%
    mutate(dateData = dateTimeData %>% as.Date()) %>%
    arrange(desc(dateTimeData))
  return(url_df)
}

#' Gets most recent CV log URLs
#'
#' @return
#' @import stringr
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom readr read_tsv
#' @importFrom tidyr spread
#'
#' @examples get_urls_gkg_most_recent_log()
get_urls_vgkg_most_recent  <- function() {
  log_df <-
    'http://data.gdeltproject.org/gdeltv2_cloudvision/lastupdate.txt' %>%
    read_tsv(col_names = F) %>%
    suppressMessages()

  names(log_df) <-
    c('value')

  values <-
    log_df$value %>% str_split("\\ ") %>%
    flatten_chr


  items <-
    c('idVGKPeriod', 'idVGKHash', 'urlCloudVisionTags')

  log_df <-
    tibble(item = items, value = values) %>%
    spread(item, value) %>%
    mutate(idVGKPeriod = idVGKPeriod %>% as.numeric())

  return(log_df)
}

.get_data_vgkg_url <-
  function(url = 'http://data.gdeltproject.org/gdeltv2_cloudvision/20160606234500.imagetagsv1.csv.gz',
           remove_json_column = TRUE,
           return_message = TRUE) {
    ok_url <-
      url %>% url_ok() %>% suppressWarnings()
    if (!ok_url) {
      stop("Invalid url")
    }

    cloud_vision_data <-
      url %>%
      curl() %>%
      gzcon() %>%
      read_tsv(col_names = F) %>%
      suppressWarnings() %>%
      suppressMessages()

    names(cloud_vision_data) <-
      get_vgkg_schema() %>% pull(nameActual)

    cloud_vision_data <-
      cloud_vision_data %>%
      dplyr::mutate(
        dateCodeURL = dateTimeDocument,
        idDateTime = 1:n(),
        idVGKG = dateTimeDocument %>% paste0('-', idDateTime),
        dateTimeDocument = dateTimeDocument %>% lubridate::ymd_hms() %>% lubridate::with_tz(Sys.timezone()),
        dateDocument = dateTimeDocument %>% as.Date(),
        dimWidthHeight = dimWidthHeight %>% as.character() %>% parse_number()
      ) %>%
      dplyr::select(idVGKG, idDateTime, everything())

    if (remove_json_column) {
      cloud_vision_data <-
        cloud_vision_data %>%
        dplyr::select(-jsonCloudVision)
    }


    if (return_message) {
      "Downloaded, parsed and imported " %>%
        paste0(url) %>%
        cat(fill = T)

    }
    return(cloud_vision_data)
  }


.get_data_vgkg_day <-
  function(date_data = "2016-06-08",
           only_most_recent = F,
           include_translations = F,
           remove_json_column = T,
           return_message = T) {
    if (only_most_recent) {
      urls <-
        get_urls_vgkg_most_recent() %>%
        .$urlCloudVisionTags
    } else {
      if (!date_data %>% substr(5, 5) == "-") {
        stop("Sorry data must be in YMD format, ie, 2016-06-01")
      }

      date_data <-
        date_data %>%
        lubridate::ymd() %>%
        as.Date()

      if (date_data < "2016-02-22") {
        stop("Sorry data starts on February 22, 2016")
      }

      if (date_data > Sys.Date()) {
        stop("Sorry data can't go into the future")
      }

      if (!'cv_urls' %>% exists()) {
        cv_urls <-
          get_urls_vgkg()

        assign(x = 'get_urls_vgkg', eval(get_urls_vgkg), env = .GlobalEnv)
      }
      urls <-
        cv_urls %>%
        dplyr::filter(dateData == date_data) %>%
        .$urlCloudVisionTags

      if (include_translations) {
        urls <-
          c(urls,
            cv_urls %>%
              dplyr::filter(dateData == date_data) %>%
              .$urlTranslationTags)
      }
    }

    .get_data_vgkg_url_safe <-
      possibly(.get_data_vgkg_url, tibble())

    all_data <-
      urls %>%
      map_dfr(function(x) {
        .get_data_vgkg_url_safe(
          url = x,
          remove_json_column = remove_json_column,
          return_message = return_message
        )
      }) %>%
      distinct() %>%
      dplyr::select(idVGKG, idDateTime, dateTimeDocument, everything()) %>%
      suppressWarnings()

    if (return_message) {
      "You retrieved " %>%
        paste0(all_data %>% nrow,
               " cloud vision processed items for ",
               date_data) %>%
        cat(fill = T)
    }

    return(all_data)

  }

#' Gets CV Data for stated periods
#'
#' @param date_data
#' @param include_translations
#' @param file_directory
#' @param remove_files
#' @param empty_trash
#' @param return_message
#' @importFrom urltools domain
#' @importFrom httr url_ok
#' @import stringr dplyr purrr curl lubridate tidyr
#' @importFrom purrr flatten_chr
#' @importFrom curl curl_download
#' @importFrom readr read_tsv
#' @importFrom lubridate ymd_hms with_tz ymd
#' @return
#' @export
#'
#' @examples
get_data_vgkg_dates <-
  function(dates = c("2016-06-09", "2016-06-08"),
           include_translations = FALSE,
           only_most_recent = FALSE,
           remove_json_column = TRUE,
           return_message = TRUE) {
    if (only_most_recent) {
      dates <-
        Sys.Date()
    }

    .get_data_vgkg_day_safe <-
      possibly(.get_data_vgkg_day, tibble())

    all_data <-
      dates %>%
      map_dfr(
        function(x)
          .get_data_vgkg_day_safe(
            date_data = x,
            only_most_recent = only_most_recent,
            remove_json_column = remove_json_column,
            return_message = return_message
          )
      )

    all_data <-
      all_data %>%
      separate(idVGKG, into = c('VGKG', 'remove'), '\\-') %>%
      group_by(dateDocument) %>%
      mutate(count = 1:n()) %>%
      mutate(idVGKG = VGKG %>% paste0('-', count)) %>%
      dplyr::select(-c(count, idDateTime, remove, VGKG)) %>%
      dplyr::select(idVGKG, everything()) %>%
      ungroup()

    all_data <-
      all_data %>%
      mutate(domainSource = documentSource %>% urltools::domain()) %>%
      dplyr::select(idVGKG:dateTimeDocument, domainSource, everything())

    return(all_data)

  }

.parse_xml_extra <-
  function(x =  "<PAGE_LINKS>http://therealdeal.com/2015/05/07/hfz-secures-1b-in-financing-for-high-line-site/;http://therealdeal.com/2015/09/10/uk-hedge-fund-loaning-850m-for-relateds-hudson-yards-resi-tower/;http://therealdeal.com/2016/02/05/hfz-seeks-250m-from-eb-5-investors-for-high-line-condos/;http://therealdeal.com/2016/07/28/macklowe-seeking-1b-loan-for-1-wall-street-conversion/;http://therealdeal.com/2016/08/18/first-look-floor-plans-at-hfzs-high-line-development/;http://therealdeal.com/2016/10/05/inside-gary-barnetts-game-of-real-estate-tetris/;http://therealdeal.com/2016/10/06/six-senses-to-open-hotel-at-hfzs-high-line-project/;http://therealdeal.com/issues_articles/whos-bankrolling-the-boom/;http://therealdeal.com/issues_articles/ziel-feldman-it-was-worth-every-penny/</PAGE_LINKS><PAGE_PRECISEPUBTIMESTAMP>20161025180000</PAGE_PRECISEPUBTIMESTAMP>") {
    safe_xml <-
      possibly(read_xml, otherwise = NULL)

    gdelt_xml <-
      list("<item>", x, "</item>") %>%
      invoke(paste0, .) %>%
      safe_xml()

    if (gdelt_xml %>% length > 0) {
      values <-
        gdelt_xml %>%
        xml_children() %>%
        html_text()

      items <-
        gdelt_xml %>%
        xml_children() %>%
        xml_name()

      xml_df <-
        tibble(item = items, value = values)

      xml_df <-
        xml_df$item %>%
        unique %>%
        map_dfr(function(x) {
          item_value <-
            xml_df %>%
            filter(item == x) %>%
            .$value %>%
            str_split('\\;') %>%
            flatten_chr()

          tibble(item = x, value = item_value) %>%
            group_by(item) %>%
            mutate(idXMLItem = seq_along(item)) %>%
            ungroup() %>%
            dplyr::select(idXMLItem, item, value)
        })
    } else {
      xml_df <-
        tibble()
    }
    return(xml_df)
  }


#' Parse XML Extra
#'
#' @param data
#' @param return_wide
#'
#' @return
#' @import xml2 purrr dplyr tidyr
#' @return
#' @export
#'
#' @examples
parse_xml_extras <-
  function(data, return_wide = FALSE) {
    xml_data <-
      data %>%
      filter(!xmlExtras %>% is.na()) %>%
      select(idGKG, xmlExtras)
    parse_xml_extra_safe <-
      possibly(.parse_xml_extra, NULL)

    xml_extra_df <-
      1:nrow(xml_data) %>%
      map_dfr(function(x) {
        row_data <-
          xml_data %>%
          slice(x)

        has_data <-
          row_data$xmlExtras %>%
          parse_xml_extra_safe() %>% nrow() > 0

        if (has_data) {
          xml_df <-
            row_data$xmlExtras %>%
            parse_xml_extra_safe() %>%
            mutate(idGKG = row_data$idGKG) %>%
            dplyr::select(idGKG, everything())
          return(xml_df)
        }
      })

    xml_extra_df <-
      xml_extra_df %>%
      dplyr::filter(!value %>% is.na()) %>%
      suppressWarnings()

    if (return_wide) {
      xml_extra_df <-
        xml_extra_df %>%
        mutate(idXMLItem = idXMLItem - 1,
               item = ifelse(idXMLItem > 0, paste0(item, idXMLItem), item)) %>%
        dplyr::select(-idXMLItem) %>%
        spread(item, value)
    }

    return(xml_extra_df)
  }

#' Parse XML Labels
#'
#' @param data
#' @param id_dateTime
#'
#' @return
#' @import dplyr
#' @import tidyr
#'
#' @examples

get_clean_count_vkg_data <-
  function(all_counts,
           extra_key = NA,
           count_col = 'idImageLabel',
           return_wide = T) {
    if (!extra_key %>% is.na()) {
      clean_data <-
        all_counts %>%
        dplyr::rename_(count = count_col,
                       ek = extra_key) %>%
        group_by(idVGKG) %>%
        mutate(count = 1:n()) %>%
        ungroup %>%
        mutate(count = count - 1) %>%
        gather(item, value, -c(idVGKG, count, ek), na.rm = T) %>%
        mutate(item = ifelse(count == 0, item, item %>% paste0(count)),
               value = value %>% str_trim) %>%
        separate(
          idVGKG,
          into = c('GKG', 'dateTime'),
          sep = '\\-',
          remove = F
        ) %>%
        mutate(dateTime = dateTime %>% as.numeric) %>%
        arrange(dateTime, count) %>%
        dplyr::select(-c(dateTime, GKG))
    } else {
      clean_data <-
        all_counts %>%
        dplyr::rename_(count = count_col) %>%
        group_by(idVGKG) %>%
        mutate(count = 1:n()) %>%
        ungroup %>%
        mutate(count = count - 1) %>%
        gather(item, value, -c(idVGKG, count), na.rm = T) %>%
        mutate(item = ifelse(count == 0, item, item %>% paste0(count)),
               value = value %>% str_trim) %>%
        separate(
          idVGKG,
          into = c('GKG', 'dateTime'),
          sep = '\\-',
          remove = F
        ) %>%
        mutate(dateTime = dateTime %>% as.numeric) %>%
        arrange(dateTime, count) %>%
        dplyr::select(-c(dateTime, GKG))

    }

    if (return_wide) {
      if (!extra_key %>% is.na()) {
        names_order <-
          c('idVGKG', 'ek',  clean_data$item %>% unique)
        clean_data <-
          clean_data %>%
          dplyr::select(-count)
      } else {
        names_order <-
          c('idVGKG',  clean_data$item %>% unique)
        clean_data <-
          clean_data %>%
          dplyr::select(-count) %>%
          spread(item, value)
      }
      keywords <-
        c(
          'amountValue',
          'latitude',
          'scoreGoldstein',
          'articleWordCount',
          'longitude',
          'countEvent',
          'idTypeLocation',
          'charLoc',
          'length',
          'count',
          'month',
          'day',
          'year',
          'score'
        )

      length_nums <-
        clean_data %>%
        dplyr::select(dplyr::matches(keywords %>% paste0(collapse = '|'))) %>%
        dplyr::select(-dplyr::matches('namePerson|idCountry')) %>%
        names %>% length

      if (length_nums > 0) {
        numeric_vars <-
          clean_data %>%
          dplyr::select(dplyr::matches(keywords %>% paste0(collapse = '|'))) %>%
          dplyr::select(-dplyr::matches('namePerson|idCountry')) %>%
          names
        clean_data <-
          clean_data %>%
          mutate_at(numeric_vars,
                    funs(as.numeric))

      }

      clean_data <-
        clean_data %>%
        dplyr::select_(.dots = names_order) %>%
        separate(
          idVGKG,
          into = c('GKG', 'dateTime'),
          sep = '\\-',
          remove = F
        ) %>%
        mutate(dateTime = dateTime %>% as.numeric) %>%
        arrange(dateTime) %>%
        dplyr::select(-c(GKG, dateTime))

      if (!extra_key %>% is.na()) {
        names(clean_data)[2] <-
          extra_key
      }
    } else {
      clean_data <-
        clean_data %>%
        dplyr::select(-count) %>%
        separate(
          idVGKG,
          into = c('GKG', 'dateTime'),
          sep = '\\-',
          remove = F
        ) %>%
        mutate(dateTime = dateTime %>% as.numeric) %>%
        arrange(dateTime) %>%
        dplyr::select(-c(GKG, dateTime))

    }

    return(clean_data)
  }

parse_xml_labels <-
  function(data, id_vgkg = 1)
  {
    xmlData <-
      data %>%
      dplyr::filter(idVGKG == id_vgkg) %>%
      .$xmlLabels

    if (xmlData %>% is.na()) {
      xml_df <-
        tibble(idVGKG = id_vgkg)
    } else {
      if (xmlData %>% str_detect('<RECORD>')) {
        xmlData <-
          xmlData %>%
          str_split('<RECORD>') %>%
          flatten_chr()
      }


      xml_df <-
        tibble(xmlData) %>%
        separate(
          xmlData,
          into = c('nameLabel', 'scoreConfidenceLabel', 'midGoogle'),
          sep = '<FIELD>'
        ) %>%
        mutate(
          idVGKG = id_vgkg,
          idImageLabel = 1:n(),
          scoreConfidenceLabel = scoreConfidenceLabel %>% as.numeric()
        ) %>%
        dplyr::select(idVGKG, idImageLabel, everything()) %>%
        suppressWarnings()
    }

    return(xml_df)

  }

#' Parses Cloud Vision Lables
#'
#' @param gdelt_data
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#' @import purrr
#' @examples
parse_vgkg_labels <- function(gdelt_data,
                              filter_na = T,
                              return_wide = T) {
  parse_xml_labels_safe <-
    possibly(parse_xml_labels, tibble())

  allxmlLabels <-
    gdelt_data$idVGKG %>%
    map_dfr(function(x) {
      parse_xml_labels_safe(data = gdelt_data, id_vgkg = x)
    }) %>%
    suppressWarnings()

  allxmlLabels <-
    allxmlLabels %>%
    get_clean_count_vkg_data(count_col = 'idImageLabel', return_wide = return_wide)

  if (!return_wide) {
    allxmlLabels <-
      allxmlLabels %>%
      .resolve_long_names()
  }

  return(allxmlLabels)
}


#' Parses XML Landmarks
#'
#' @param data
#' @param id_vgkg
#'
#' @return
#'
#' @examples
parse_xml_landmarks <-
  function(data, id_vgkg = "20160610194500-2247") {
    xmlData <-
      data %>%
      dplyr::filter(idVGKG == id_vgkg) %>%
      .$xmlGeoLandmarks

    if (xmlData %>% is.na()) {
      xml_df <-
        tibble(idVGKG = id_vgkg)
    } else {
      if (xmlData %>% str_detect('<RECORD>')) {
        xmlData <-
          xmlData %>%
          str_split('<RECORD>') %>%
          flatten_chr()
      }


      xml_df <-
        tibble(xmlData) %>%
        separate(
          xmlData,
          into = c(
            'nameLandmark',
            'scoreConfidenceLandmark',
            'midGoogle',
            'latLonLandmark'
          ),
          sep = '<FIELD>'
        ) %>%
        separate(
          col = 'latLonLandmark',
          sep = '\\,',
          into = c('latitudeLandmark', 'longitudeLandmark')
        ) %>%
        mutate(
          idVGKG = id_vgkg,
          idLandmarkImage = 1:n(),
          scoreConfidenceLandmark = scoreConfidenceLandmark %>% as.numeric(),
          latitudeLandmark = latitudeLandmark %>% as.numeric,
          longitudeLandmark = longitudeLandmark %>% as.numeric
        ) %>%
        dplyr::select(idVGKG, idLandmarkImage, everything()) %>%
        suppressWarnings()
    }
    return(xml_df)

  }

#' Parses Cloud Vision Landmark
#'
#' @param gdelt_data
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#'
#' @examples
parse_vgkg_landmarks <- function(gdelt_data,
                                 filter_na = T,
                                 return_wide = T) {
  parse_xml_landmarks_safe <-
    possibly(parse_xml_landmarks, tibble())

  all_data <-
    gdelt_data$idVGKG %>%
    map_dfr(function(x) {
      parse_xml_landmarks_safe(data = gdelt_data, id_vgkg = x)
    }) %>%
    suppressWarnings()

  if (filter_na) {
    all_data <-
      all_data %>%
      dplyr::filter(!nameLandmark %>% is.na)
  }

  all_data <-
    all_data %>%
    get_clean_count_vkg_data(count_col = 'idLandmarkImage', return_wide = return_wide)

  if (!return_wide) {
    all_data <-
      all_data %>%
      .resolve_long_names()
  }
  return(all_data)
}


#' Parses XML Logo
#'
#' @param data
#' @param id_vgkg
#'
#' @return
#'
#' @examples
parse_xml_logos <-
  function(data, id_vgkg = 1) {
    xmlData <-
      data %>%
      dplyr::filter(idVGKG == id_vgkg) %>%
      .$xmlLogos

    if (xmlData %>% is.na()) {
      xml_df <-
        tibble(idVGKG = id_vgkg)
    } else {
      xmlData <-
        xmlData %>%
        str_split('<RECORD>') %>%
        flatten_chr()

      xml_df <-
        tibble(xmlData) %>%
        separate(
          xmlData,
          into = c('nameLogo', 'scoreConfidenceLogo', 'midGoogle'),
          sep = '<FIELD>'
        ) %>%
        mutate(
          idVGKG = id_vgkg,
          idLogoImage = 1:n(),
          scoreConfidenceLogo = scoreConfidenceLogo %>% as.numeric()
        ) %>%
        dplyr::select(idVGKG, idLogoImage, everything()) %>%
        suppressWarnings()
    }

    return(xml_df)

  }

#' Parses Cloud Vision Logos
#'
#' @param gdelt_data
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#'
#' @examples
parse_vgkg_logos <- function(gdelt_data,
                             filter_na = T,
                             return_wide = T) {
  parse_xml_logos_safe <-
    possibly(parse_xml_logos,tibble())

  all_data <-
    gdelt_data$idVGKG %>%
    map_dfr(function(x) {
      parse_xml_logos_safe(data = gdelt_data, id_vgkg = x)
    }) %>%
    suppressWarnings()

  all_data <-
    all_data %>%
    mutate(midGoogle = ifelse(midGoogle == '', NA, midGoogle))

  if (filter_na) {
    all_data <-
      all_data %>%
      dplyr::filter(!nameLogo %>% is.na())
  }

  all_data <-
    all_data %>%
    get_clean_count_vkg_data(count_col = 'idLogoImage', return_wide = return_wide)

  if (!return_wide) {
    all_data <-
      all_data %>%
      .resolve_long_names() %>%
      suppressWarnings()
  }

  return(all_data)
}

#' Parses Safe Search
#'
#' @param data
#' @param id_vgkg
#'
#' @return
#'
#' @examples
parse_xml_safe_search <-
  function(data, id_vgkg = 1) {
    xmlData <-
      data %>%
      dplyr::filter(idVGKG == id_vgkg) %>%
      .$xmlSafeSearch

    if (xmlData %>% is.na()) {
      xml_df <-
        tibble(idVGKG = id_vgkg)
    } else {
      xmlData <-
        xmlData %>%
        str_split('<RECORD>') %>%
        flatten_chr()

      xml_df <-
        tibble(xmlData) %>%
        separate(
          xmlData,
          into = c(
            'scoreViolenceLikelihood',
            'scoreMedicalLikelihood',
            'scoreSpoofLikelihood',
            'scoreAdultLikelihood'
          ),
          sep = '<FIELD>'
        )

      xml_df <-
        xml_df %>%
        mutate_at(xml_df %>% dplyr::select(dplyr::matches("score")) %>% names(),
                     funs(as.integer)) %>%
        mutate(idVGKG = id_vgkg,
               idSafeSearchImage = 1:n()) %>%
        dplyr::select(idVGKG, idSafeSearchImage, everything()) %>%
        suppressWarnings() %>%
        distinct()
    }

    return(xml_df)

  }

#' Parses Safe Search
#'
#' @param gdelt_data
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#'
#' @examples
parse_vgkg_safe_search <- function(gdelt_data,
                                   filter_na = T,
                                   return_wide = T) {
  parse_xml_safe_search_safe <-
    possibly(parse_xml_safe_search, tibble())

  all_data <-
    gdelt_data$idVGKG %>%
    map_dfr(function(x) {
      parse_xml_safe_search_safe(data = gdelt_data, id_vgkg = x)
    }) %>%
    suppressWarnings()

  if (filter_na) {
    all_data <-
      all_data %>%
      dplyr::filter(!idSafeSearchImage %>% is.na)
  }

  if (!return_wide) {
    all_data <-
      all_data %>%
      .resolve_long_names()
  }

  return(all_data)
}

#' Parses XML Faces
#'
#' @param data
#' @param id_vgkg
#'
#' @return
#'
#' @examples
parse_xml_faces <-
  function(data, id_vgkg = 1) {
    xmlData <-
      data %>%
      dplyr::filter(idVGKG == id_vgkg) %>%
      .$xmlFaces

    if (xmlData %>% is.na()) {
      xml_df <-
        tibble(idVGKG = id_vgkg)
    } else {
      xmlData <-
        xmlData %>%
        str_split('<RECORD>') %>%
        flatten_chr()

      xml_df <-
        tibble(xmlData) %>%
        separate(
          xmlData,
          into = c(
            'scoreDetectionConfidence',
            'angleRoll',
            'anglePan',
            'angleTilt',
            'scoreLandmarkingConfidence',
            'boxBounding',
            'scoreEmotionSorrowLikelihood',
            'scoreEmotionAngerLikelihood',
            'scoreHeadwearLikelihood',
            'scoreEmotionJoyLikelihood',
            'scoreEmotionSurpriseLikelihood',
            'scoreUnderExposedLikelihood',
            'scoreBlurredLikelihood'
          ),
          sep = '<FIELD>'
        ) %>%
        separate('boxBounding', sep = ';', c('xy1', 'xy2', 'xy3', 'xy4')) %>%
        separate('xy1', sep = ',', c('faceX1', 'faceY1')) %>%
        separate('xy2', sep = ',', c('faceX2', 'faceY2')) %>%
        separate('xy3', sep = ',', c('faceX3', 'faceY3')) %>%
        separate('xy4', sep = ',', c('faceX4', 'faceY4'))

      xml_df <-
        xml_df %>%
        mutate_at(.vars =
                    xml_df %>% dplyr::select(dplyr::matches("score|face|angle")) %>% names(),
                  .funs = as.numeric) %>%
        mutate(idVGKG = id_vgkg,
               idFace = 1:n()) %>%
        dplyr::select(idVGKG, idFace, everything()) %>%
        suppressWarnings() %>%
        distinct()
    }

    return(xml_df)

  }

#' Parses Cloud Vision Faces
#'
#' @param gdelt_data
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#'
#' @examples
parse_vgkg_faces <- function(gdelt_data,
                             filter_na = T,
                             return_wide = T) {
  parse_xml_faces_search_safe <-
    possibly(parse_xml_faces, tibble())

  all_data <-
    gdelt_data$idVGKG %>%
    map_dfr(function(x) {
      parse_xml_faces_search_safe(data = gdelt_data, id_vgkg = x)
    }) %>%
    suppressWarnings()

  if (filter_na) {
    all_data <-
      all_data %>%
      dplyr::filter(!scoreDetectionConfidence %>% is.na())
  }

  all_data <-
    all_data %>%
    get_clean_count_vkg_data(count_col = 'idFace', return_wide = return_wide)

  if (!return_wide) {
    all_data <-
      all_data %>%
      .resolve_long_names()
  }

  return(all_data)
}

#' Parses XML OCR
#'
#' @param data
#' @param id_vgkg
#'
#' @return
#'
#' @examples
parse_xml_ocr <-
  function(data, id_vgkg = 1)  {
    xmlData <-
      data %>%
      dplyr::filter(idVGKG == id_vgkg) %>%
      .$xmlOCR

    if (xmlData %>% is.na()) {
      xml_df <-
        tibble(idVGKG = id_vgkg)
    } else {
      xmlData <-
        xmlData %>%
        str_split('<RECORD>') %>%
        flatten_chr() %>%
        gsub('\\n', '', .)

      xml_df <-
        tibble(textOCR = xmlData)

      xml_df <-
        xml_df %>%
        mutate(idVGKG = id_vgkg,
               idItemOCR = 1:n()) %>%
        dplyr::select(idVGKG, idItemOCR, everything()) %>%
        suppressWarnings() %>%
        distinct()
    }
    return(xml_df)

  }

#' Parses Cloud Vision OCR
#'
#' @param gdelt_data
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#'
#' @examples
parse_vgkg_ocr <- function(gdelt_data,
                           filter_na = T,
                           return_wide = T) {
  parse_xml_ocr_safe <-
    possibly(parse_xml_ocr, tibble())

  all_data <-
    gdelt_data$idVGKG %>%
    map_dfr(function(x) {
      parse_xml_ocr_safe(data = gdelt_data, id_vgkg = x)
    }) %>%
    suppressWarnings()

  if (filter_na) {
    all_data <-
      all_data %>%
      dplyr::filter(!idItemOCR %>% is.na())
  }

  all_data <-
    all_data %>%
    get_clean_count_vkg_data(count_col = 'idItemOCR', return_wide = return_wide)

  if (!return_wide) {
    all_data <-
      all_data %>%
      .resolve_long_names()
  }

  return(all_data)
}


#' Parses XML Language Type
#'
#' @param data
#' @param id_vgkg
#'
#' @return
#'
#' @examples
parse_language_types <-
  function(data, id_vgkg = 1)  {
    xmlData <-
      data %>%
      dplyr::filter(idVGKG == id_vgkg) %>%
      .$codesLanguages

    if (xmlData %>% is.na()) {
      xml_df <-
        tibble(idVGKG = id_vgkg)
    } else {
      xmlData <-
        xmlData %>%
        str_split('\\,') %>%
        flatten_chr()

      xml_df <-
        tibble(idLanguage = xmlData) %>%
        mutate(idVGKG = id_vgkg,
               idItemLanguage = 1:n()) %>%
        dplyr::select(idVGKG, idItemLanguage, everything()) %>%
        suppressWarnings()
    }

    return(xml_df)

  }

#' Parses Cloud Vision Languages
#'
#' @param gdelt_data
#' @param filter_na
#' @param return_wide
#'
#' @return
#' @export
#'
#' @examples
parse_vgkg_languages <- function(gdelt_data,
                                 filter_na = T,
                                 return_wide = T) {
  parse_language_types_safe <-
    possibly(parse_language_types, tibble())

  all_data <-
    gdelt_data$idVGKG %>%
    map_dfr(function(x) {
      parse_language_types_safe(data = gdelt_data, id_vgkg = x)
    }) %>%
    suppressWarnings()

  if (filter_na) {
    all_data <-
      all_data %>%
      dplyr::filter(!idItemLanguage %>% is.na())
  }

  all_data <-
    all_data %>%
    get_clean_count_vkg_data(count_col = 'idItemLanguage', return_wide = return_wide)

  if (!return_wide) {
    all_data <-
      all_data %>%
      .resolve_long_names()
  }

  return(all_data)
}


#' Gets most recent GKG TV log URLs
#'
#' @return
#' @import stringr
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom readr read_tsv
#' @importFrom tidyr separate
#' @export
#'
#' @examples get_urls_gkg_most_recent_log()
get_urls_gkgtv_most_recent_log <- function() {
  urlData <-
    'http://data.gdeltproject.org/gdeltv2_iatelevision/lastupdate.txt' %>%
    read_tsv(col_names = F) %>%
    dplyr::select(-1) %>%
    suppressMessages()

  names(urlData) <-
    c('idHash', 'urlData')

  urlData <-
    urlData %>%
    mutate(
      dateData = urlData %>% str_replace_all(
        'http://data.gdeltproject.org/gdeltv2_iatelevision/|.gkg.csv.gz',
        ''
      ) %>% as.Date('%Y%m%d')
    ) %>%
    dplyr::select(dateData, everything())

  return(urlData)
}

#' Retrieves GKG TV Schema
#'
#' @return
#' @importFrom dplyr tibble
#' @examples
#' get_tv_schema

get_tv_schema  <- function() {
  tv_schema <-
    tibble(
      nameGDELT = c(
        "GKGRECORDID",
        "DATE",
        "SourceCollectionIdentifier",
        "SourceCommonName",
        "DocumentIdentifier",
        "Counts",
        "V2Counts",
        "Themes",
        "V2Themes",
        "Locations",
        "V2Locations",
        "Persons",
        "V2Persons",
        "Organizations",
        "V2Organizations",
        "V2Tone",
        "Dates",
        "GCAM",
        "SharingImage",
        "RelatedImages",
        "SocialImageEmbeds",
        "SocialVideoEmbeds",
        "Quotations",
        "AllNames",
        "Amounts",
        "TranslationInfo",
        "Extras"
      ),
      nameActual = c(
        "idGKG",
        "dateDocument",
        "idSourceCollectionIdentifier",
        "nameSource",
        "documentSource",
        "counts",
        "countsCharLoc",
        "themes",
        "themesCharLoc",
        "locations",
        "locationsCharLoc",
        "persons",
        "personsCharLoc",
        "organizations",
        "organizationsCharLoc",
        "tone",
        "dates",
        "gcam",
        "urlImage",
        "urlImageRelated",
        "urlSocialMediaImageEmbeds",
        "urlSocialMediaVideoEmbeds",
        "quotations",
        "mentionedNamesCounts",
        "mentionedNumericsCounts",
        "translationInfo",
        "xmlExtras"
      )
    )
  return(tv_schema)

}
#' Gets Global Knowledge Graph TV Summary Files
#'
#' @param remove_count_files
#' @param return_message
#'
#' @return
#' @export
#' @import stringr
#' @import dplyr
#' @importFrom magrittr %>%
#' @importFrom readr read_tsv
#' @importFrom lubridate ymd
#' @importFrom tidyr separate
#' @examples
#' get_urls_gkg_daily_summaries(remove_count_files = T)
dictionary_gkg_tv_daily_summaries <-
  function(return_message = T) {
    url <-
      'http://data.gdeltproject.org/gdeltv2_iatelevision/masterfilelist.txt'

    urlData <-
      url %>%
      read_tsv(col_names = F) %>%
      dplyr::select(-1) %>%
      suppressMessages()

    names(urlData) <-
      c('idHash', 'urlData')

    urlData <-
      urlData %>%
      mutate(
        dateData = urlData %>% str_replace_all(
          'http://data.gdeltproject.org/gdeltv2_iatelevision/|.gkg.csv.gz',
          ''
        ) %>% as.Date('%Y%m%d')
      ) %>%
      dplyr::select(dateData, everything())

    if (return_message) {
      count.files <-
        urlData %>%
        nrow()

      min.date <-
        urlData$dateData %>% min(na.rm = T)

      max.date <-
        urlData$dateData %>% max(na.rm = T)

      "You got " %>%
        paste0(count.files,
               ' GDELT TV Global Knowledge Graph URLS from ',
               min.date,
               ' to ',
               max.date) %>%
        cat(fill = T)
    }

    return(urlData)
  }

.get_data_gkg_tv <-
  function(url = 'http://data.gdeltproject.org/gdeltv2_iatelevision/20160609.gkg.csv.gz',
           return_message = T) {
    ok_url <-
      url %>% url_ok() %>% suppressWarnings()
    if (ok_url == FALSE) {
      stop("Invalid url")
    }

    gkg_tv_data <-
      url %>%
      curl() %>%
      gzcon() %>%
      read_tsv(col_names = F) %>%
      suppressWarnings() %>%
      suppressMessages()

    names(gkg_tv_data) <-
      get_tv_schema() %>% .$nameActual

    gkg_tv_data <-
      gkg_tv_data %>%
      dplyr::mutate(
        idSourceCollectionIdentifier = idSourceCollectionIdentifier %>% as.numeric(),
        isDocumentURL = ifelse(documentSource %>% str_detect('http'), T, F)
      ) %>%
      dplyr::select(idGKG:idSourceCollectionIdentifier,
                    isDocumentURL,
                    everything()) %>%
      dplyr::rename(dateTimeDocument = dateDocument) %>%
      dplyr::mutate(dateTimeDocument = dateTimeDocument %>% lubridate::ymd_hms() %>% with_tz(Sys.timezone())) %>%
      suppressMessages() %>%
      suppressWarnings()

    gkg_tv_data <-
      gkg_tv_data %>%
      mutate(docDetails = documentSource %>% sub('\\_', '\\-', .) %>% sub('\\_', '\\-', .)  %>% sub('\\_', '\\-', .)) %>%
      separate(
        docDetails,
        sep = '\\-',
        into = c('idTVNetwork', 'date', 'time', 'nameTVShow')
      ) %>%
      dplyr::select(-c(date, time)) %>%
      mutate(
        nameTVShow = nameTVShow %>% str_replace_all('\\_', ' '),
        urlArchiveVideo = documentSource %>% paste0('https://archive.org/details/', .)
      ) %>%
      dplyr::select(idGKG:documentSource,
                    urlArchiveVideo,
                    idTVNetwork,
                    nameTVShow,
                    everything())

    if (return_message) {
      "Downloaded, parsed and imported " %>%
        paste0(url) %>%
        cat(fill = T)

    }
    return(gkg_tv_data)
  }

.get_data_gkg_tv_day <- function(date_data = "2016-06-01",
                                only_most_recent = F,
                                return_message = T) {
  if (only_most_recent) {
    date_data <-
      Sys.Date() - 2
  }

  if (!date_data %>% substr(5, 5) == "-") {
    stop("Sorry data must be in YMD format, ie, 2016-06-01")
  }

  date_data <-
    date_data %>%
    ymd %>% as.Date()


  if (date_data < "2009-06-04") {
    stop("Sorry data starts on June 4th, 2009")
  }

  if (date_data > Sys.Date() - 2) {
    stop("Sorry data can't go into the future is on a 2 day lag")
  }

  if (only_most_recent) {
    gkg_recent <-
      get_urls_gkgtv_most_recent_log()
    urls <-
      gkg_recent %>%
      .$urlData
  } else {
    if (!'gkg_tv_urls' %>% exists()) {
      gkg_tv_urls <-
        dictionary_gkg_tv_daily_summaries()

      assign(x = 'gkg_tv_urls', eval(gkg_tv_urls), env = .GlobalEnv)
    }

    urls <-
      gkg_tv_urls %>%
      dplyr::filter(dateData == date_data) %>%
      .$urlData
  }

  get_data_gkg_tv_safe <-
    possibly(.get_data_gkg_tv, tibble())

  all_data <-
    urls %>%
    map_dfr(function(x) {
      get_data_gkg_tv_safe(url = x,
                      return_message = return_message)
    }) %>%
    distinct() %>%
    suppressMessages() %>%
    suppressWarnings()

  if (return_message) {
    "You retrieved " %>%
      paste0(all_data %>% nrow, " gkg tv events for ", date_data) %>%
      cat(fill = T)
  }

  return(all_data)
}


#' Gets GKG TV data for specified days
#'
#' @param dates specified dates, year, month day format
#' @param only_most_recent returns only the most recent period
#' \code{c(T, F)}
#' @param return_message
#'
#' @return
#' @export
#' @import purrr
#' @examples
gkg_tv_days <-
  function(dates = c("2016-06-01", "2016-02-01"),
           only_most_recent = F,
           return_message = T) {
    get_data_gkg_tv_day_safe <-
      possibly(.get_data_gkg_tv_day, tibble())

    all_data <-
      dates %>%
      map_dfr(
        function(x)
          get_data_gkg_tv_day_safe(
            date_data = x,
            only_most_recent = only_most_recent,
            return_message = return_message
          )
      ) %>%
      suppressWarnings()

    return(all_data)
  }


#' Convert date columns to numeric
#'
#' @param data
#' @param column_keyword Keyword of the data frame's columns to look for that signifies dates
#' \code{c('date')}
#' @param exclude_col_type The types of column to exclude from converting to numeric
#' \code{c('character')}
#' @param return_message  return a message
#' @importFrom tibble rownames_to_column
#' @return
#' @export
#'
#' @examples
date_columns_to_numeric <-
  function(data,
           column_keyword = 'date',
           exclude_col_type = c('character'),
           return_message = T) {
    name_df <-
      data %>%
      dplyr::select(dplyr::matches(column_keyword)) %>%
      map(class) %>%
      unlist() %>%
      data.frame(col_class = .) %>%
      rownames_to_column()

    exclude_cols <-
      name_df %>%
      dplyr::filter(col_class %in% col_type) %>%
      .$rowname

    mutate_cols <-
      data %>%
      dplyr::select(dplyr::matches(column_keyword)) %>%
      dplyr::select(-one_of(exclude_cols)) %>% names

    data <-
      data %>%
      mutate_at(mutate_cols, as.numeric)

    if (return_message) {
      "You convereted " %>%
        paste0(paste0(mutate_cols, collapse = ', '), ' to numeric') %>%
        cat(fill = T)
    }

    return(data)

  }


# trelliscope -------------------------------------------------------------
check_for_trelliscope_js <-
  function() {
    missing <-
      installed.packages() %>% dplyr::as_tibble() %>%
      dplyr::filter(Package == 'trelliscopejs') %>%
      nrow() == 0
    if (missing) {
      devtools::install_github("hafen/trelliscopejs")
    }
  }

.filter_domain_data <-
  function(data,
           domains = c(
             'wsj.com',
             'law360',
             'nytimes.com',
             'dailymail',
             'recode',
             'architizer',
             'globest',
             'realert',
             'seekingalpha.com',
             'bloomberg.com',
             'curbed.com',
             'commercialobserver.com',
             'globest.com',
             'realdeal.com',
             'techcrunch.com',
             'nytimes.com',
             'architizer',
             'pehub',
             'rew-online',
             'nypost'
           ),
           random_domains = 20,
           only_pictures = TRUE) {
    search_domains <-
      c()
    if (!random_domains %>% is_null()) {
      randoms <-
        data$domainSource %>% unique() %>% sample(random_domains)
      search_domains <-
        c(search_domains, randoms) %>% unique()
    }

    if (!domains %>% is_null()) {
      search_domains <-
        c(search_domains, domains)
    }

    search_domains <-
      search_domains %>%
      unique() %>%
      paste(collapse = '|')

    data <-
      data %>%
      filter(domainSource %>% str_detect(search_domains))

    if (only_pictures) {
      data <-
        data %>%
        filter(!urlImage %>% is.na())
    }

    return(data)
  }

#' Visualize Cloudvision trelliscope
#'
#' @param data
#' @param extra_columns
#' @param trelliscope_parameters \itemize{
#' \item title: Trelliscope title
#' \item rows: Trelliscope rows
#' \item columns: Trelliscope columns
#' \item path: file path to save trelliscope, if NULL no where
#' }
#' @param vgkg_parse \itemize{
#' \item faces,
#' \item labels,
#' \item landmarks,
#' \item languages,
#' \item logos,
#' \item ocr
#' }
#' @param domains
#' @param random_domains
#' @param only_pictures
#'
#' @return
#' @export
#' @import dplyr purrr tidyr trelliscopejs stringr
#' @examples
visualize_vgkg_trelliscope <-
  function(data,
           vgkg_parse = 'landmarks',
           domains = c(
             'wsj.com',
             'seekingalpha.com',
             'bloomberg.com',
             'curbed.com',
             'commercialobserver.com',
             'globest.com',
             'realdeal.com',
             'techcrunch.com',
             'nytimes.com',
             'architizer',
             'pehub',
             'rew-online.com',
             'nypost.com'
           ),
           trelliscope_parameters = list(
             title = NULL,
             path = NULL,
             rows = NULL,
             columns = NULL
           ),
           random_domains = 0,
           only_pictures = TRUE,
           extra_columns = NULL) {
    check_for_trelliscope_js()

    if (!extra_columns %>% is_null()) {
      df_extra <-
        data %>%
        dplyr::select(idVGKG, one_of(extra_columns))
    }
    has_domains <-
      !domains %>% is_null() |
      !random_domains %>% is_null()
    if (has_domains) {
      data <-
        data %>%
        .filter_domain_data(domains = domains, random_domains = random_domains)
    }
    if (!vgkg_parse %>% is_null()) {
      vgkg_options <-
        c('faces',
          'labels',
          'landmarks',
          'languages',
          'logos',
          'ocr')
      vgkg_parse <-
        vgkg_parse %>% str_to_lower()
      if (!vgkg_parse %in% vgkg_options) {
        stop(list(
          "Parse options can only be:\n",
          paste(vgkg_options, collapse = '\n')
        ) %>% reduce(paste0))
      }

      is_faces <-
        vgkg_parse == 'faces'

      if (is_faces) {
        data <-
          data %>%
          parse_vgkg_faces(filter_na = TRUE, return_wide = TRUE) %>%
          left_join(data %>% select(idVGKG:urlImage, dateDocument)) %>%
          suppressMessages() %>%
          suppressWarnings()

        main_name <-
          c('scoreDetectionConfidence',
            'scoreEmotionAngerLikelihood')
      }

      is_labels <-
        vgkg_parse == 'labels'

      if (is_labels) {
        data <-
          data %>%
          parse_vgkg_labels(filter_na = TRUE, return_wide = FALSE) %>%
          left_join(data %>% select(idVGKG:urlImage, dateDocument)) %>%
          suppressMessages() %>%
          suppressWarnings()

        main_name <-
          c('nameLabel', 'scoreConfidenceLabel')
      }

      is_landmark <-
        vgkg_parse == 'landmarks'

      if (is_landmark) {
        data <-
          data %>%
          parse_vgkg_landmarks(filter_na = TRUE, return_wide = FALSE) %>%
          left_join(data %>% select(idVGKG:urlImage, dateDocument)) %>%
          suppressMessages() %>%
          suppressWarnings()

        main_name <-
          c('nameLandmark', 'scoreConfidenceLandmark')
      }

      is_language <-
        vgkg_parse == 'languages'

      if (is_language) {
        data <-
          data %>%
          parse_vgkg_languages(filter_na = TRUE, return_wide = FALSE) %>%
          left_join(data %>% select(idVGKG:urlImage, dateDocument)) %>%
          suppressMessages() %>%
          suppressWarnings()
        main_name <-
          c('idLanguage')
      }

      is_ocr <-
        vgkg_parse == 'ocr'

      if (is_ocr) {
        data <-
          data %>%
          parse_vgkg_ocr(filter_na = TRUE, return_wide = FALSE) %>%
          left_join(data %>% select(idVGKG:urlImage, dateDocument)) %>%
          suppressMessages() %>%
          suppressWarnings()
        main_name <-
          c('textOCR')
      }

      is_logos <-
        vgkg_parse == 'logos'

      if (is_logos) {
        data <-
          data %>%
          parse_vgkg_logos(filter_na = TRUE, return_wide = FALSE) %>%
          left_join(data %>% select(idVGKG:urlImage, dateDocument)) %>%
          suppressMessages() %>%
          suppressWarnings()
        main_name <-
          c('nameLogo')
      }

    } else {
      main_name <-
        ''
    }

    if (!vgkg_parse %>% is_null()) {
      parse_title <-
        vgkg_parse %>% str_to_title()
    } else {
      parse_title <- ''
    }

    dates <-
      data$dateTimeDocument %>% as.Date("%Y-%m-%d") %>% unique() %>%
      suppressWarnings()

    dates <-
      dates[!dates %>% is.na()] %>% .[[1]]


    title <-
      list('GDELT VGKG Explorer for ',  parse_title, ' ', dates) %>%
      reduce(paste0)


    if (!extra_columns %>% is_null()) {
      data <-
        data %>%
        left_join(df_extra) %>%
        suppressMessages()
    }

    data <-
      data %>%
      mutate_if(is.logical,
                funs(ifelse(. %>% is.na(), FALSE, .)))

    if (!trelliscope_parameters$title %>% is_null()) {
      title <-
        trelliscope_parameters$title
    }

    if (trelliscope_parameters$rows %>% is_null()) {
      rows <- 1
    } else {
      rows <-
        trelliscope_parameters$rows
    }

    if (trelliscope_parameters$columns %>% is_null()) {
      columns <- 2
    } else {
      columns <-
        trelliscope_parameters$columns
    }
    no_path <-
      trelliscope_parameters$path %>% is_null()

    if (no_path) {
      viz <-
        data %>%
        dplyr::select(which(colMeans(is.na(.)) < .5)) %>%
        mutate(
          panel = trelliscopejs::img_panel(urlImage),
          urlArticle = trelliscopejs::cog_href(documentSource),
          idItem = 1:n()
        ) %>%
        select(idItem, idVGKG, everything()) %>%
        arrange(idItem) %>%
        trelliscopejs::trelliscope(
          name = title,
          nrow = rows,
          ncol = columns,
          state = list(labels = c(
            "domainSource", "idVGKG", main_name[[1]], 'urlArticle'
          ))
        )
    } else {
      file_path <-
        trelliscope_parameters$path
      viz <-
        data %>%
        dplyr::select(which(colMeans(is.na(.)) < .5)) %>%
        mutate(
          panel = trelliscopejs::img_panel(urlImage),
          urlArticle = trelliscopejs::cog_href(documentSource),
          idItem = 1:n()
        ) %>%
        select(idItem, idVGKG, everything()) %>%
        arrange(idItem) %>%
        trelliscopejs::trelliscope(
          name = title,
          nrow = rows,
          ncol = columns,
          path = file_path,
          state = list(labels = c(
            "domainSource", "idVGKG", main_name, 'urlArticle'
          ))
        )
    }

    return(viz)
  }

#' Trelliscope gkg data
#'
#' @param data
#' @param domains
#' @param trelliscope_parameters \itemize{
#' \item title: Trelliscope title
#' \item rows: Trelliscope rows
#' \item columns: Trelliscope columns
#' \item path: file path to save trelliscope, if NULL no where
#' }
#' @param extra_columns
#' @param random_domains
#' @param only_pictures
#' @param gkg_parse \itemize{
#' \item 'tone',
#' \item 'dates',
#' \item 'events',
#' \item 'gcam',
#' \item 'locations',
#' \item 'names',
#' \item 'numerics',
#' \item 'organizations',
#' \item 'people',
#' \item 'quotes',
#' \item 'social',
#' \item 'themes',
#' \item 'xml
#' }
#'
#' @return
#' @export
#' @import dplyr purrr tidyr trelliscopejs stringr
#' @examples
visualize_gkg_trelliscope <-
  function(data,
           domains = c(
             'wsj.com',
             'law360',
             'nytimes.com',
             'dailymail',
             'recode',
             'architizer',
             'globest',
             'realert',
             'seekingalpha.com',
             'bloomberg.com',
             'curbed.com',
             'commercialobserver.com',
             'globest.com',
             'realdeal.com',
             'techcrunch.com',
             'nytimes.com',
             'architizer',
             'pehub',
             'rew-online',
             'nypost'
           ),
           random_domains = 0,
           only_pictures = TRUE,
           extra_columns = NULL,
           trelliscope_parameters = list(
             title = NULL,
             path = NULL,
             rows = NULL,
             columns = NULL
           ),
           gkg_parse = 'names') {
    check_for_trelliscope_js()
    has_domains <-
      !domains %>% is_null() |
      !random_domains %>% is_null()
    if (has_domains) {
      data <-
        data %>%
        .filter_domain_data(domains = domains, random_domains = random_domains)
    }

    if (!gkg_parse %>% is_null()) {
      gkg_options <-
        c(
          'tone',
          'dates',
          'events',
          'gcam',
          'locations',
          'names',
          'numerics',
          'organizations',
          'people',
          'quotes',
          'social',
          'themes',
          'xml'
        )
      gkg_parse <-
        gkg_parse %>% str_to_lower()
      if (!gkg_parse %in% gkg_options) {
        stop(list(
          "Parse options can only be:\n",
          paste(gkg_options, collapse = '\n')
        ) %>% reduce(paste0))
      }

      is_tone <-
        gkg_parse == 'tone'

      if (is_tone) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_article_tone(filter_na = TRUE, return_wide = FALSE) %>%
          left_join(data %>% select(idGKG:domainSource, urlImage)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('scoreTone')
      }

      is_xml <-
        gkg_parse == 'xml'

      if (is_xml) {
        plot_data <-
          data %>%
          filter(!xmlExtras %>% is.na()) %>%
          parse_xml_extras(return_wide = FALSE) %>%
          left_join(data %>% select(idGKG:domainSource, urlImage)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('item', 'value')
      }

      is_date <-
        gkg_parse == 'date'

      if (is_date) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_dates(filter_na = TRUE, return_wide = FALSE) %>%
          left_join(data %>% select(idGKG:domainSource, urlImage)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('dateResolution')
      }

      is_events <-
        gkg_parse == 'events'

      if (is_events) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_event_counts(return_wide = FALSE, filter_na = TRUE) %>%
          left_join(get_codes_gkg_themes()) %>%
          left_join(data %>% select(idGKG:domainSource, urlImage)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('codeGKGTheme',
            'countEvent',
            'entityEvent',
            'idADM1Code')
      }

      is_gcam <-
        gkg_parse == 'gcam'

      if (is_gcam) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_gcams(return_wide = FALSE, filter_na = TRUE) %>%
          left_join(get_codes_gcam()) %>%
          left_join(data %>% select(idGKG:domainSource, urlImage)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('idGCAM', 'dimensionHumanName')
      }

      is_locations <-
        gkg_parse == 'locations'

      if (is_locations) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_locations(return_wide = FALSE, filter_na = TRUE) %>%
          left_join(get_codes_stability_locations()) %>%
          left_join(data %>% select(idGKG:domainSource, urlImage)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('idADM1Code', 'location')
      }

      is_names <-
        gkg_parse == 'names'

      if (is_names) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_names(return_wide = FALSE, filter_na = TRUE) %>%
          left_join(data %>% select(idGKG:domainSource, urlImage)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('nameMentionedName')
      }

      is_numerics <-
        gkg_parse == 'numerics'

      if (is_numerics) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_numerics(return_wide = FALSE, filter_na = TRUE) %>%
          left_join(data %>% select(idGKG:domainSource, urlImage)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('amountTerm', 'amountValue')
      }

      is_organizations <-
        gkg_parse == 'organizations'

      if (is_organizations) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_organizations(return_wide = FALSE, filter_na = TRUE) %>%
          left_join(data %>% select(idGKG:domainSource, urlImage)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('nameOrganization')
      }

      is_people <-
        gkg_parse == 'people'

      if (is_people) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_people(return_wide = FALSE, filter_na = TRUE) %>%
          left_join(data %>% select(idGKG:domainSource, urlImage)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('namePerson')
      }

      is_quotes <-
        gkg_parse == 'quotes'

      if (is_quotes) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_quotes(return_wide = FALSE, filter_na = TRUE) %>%
          left_join(data %>% select(idGKG:domainSource, urlImage)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('lengthQuote', 'textQuote')
      }

      is_social <-
        gkg_parse == 'social'

      if (is_social) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_social_embeds() %>%
          left_join(data %>% select(idGKG:domainSource, urlImage)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('domainSocialMediaImageEmbed')
      }

      is_themes <-
        gkg_parse == 'themes'

      if (is_themes) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_themes(return_wide = F) %>%
          left_join(get_codes_gkg_themes() %>% dplyr::rename(codeTheme = codeGKGTheme)) %>%
          left_join(data %>% select(idGKG:domainSource, urlImage)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('codeTheme')
      }

    } else {
      main_name <-
        ''
    }

    if (!gkg_parse %>% is_null()) {
      parse_title <-
        gkg_parse %>% str_to_title()
    } else {
      parse_title <- ''
    }
    dates <-
      data$dateTimeDocument %>% as.Date("%Y-%m-%d") %>% unique()

    dates <-
      dates[!dates %>% is.na()] %>% .[[1]]

    title <-
      list('GDELT GKG Explorer for ',  parse_title, ' ', dates) %>%
      reduce(paste0) %>% str_trim()

    if (!extra_columns %>% is_null()) {
      plot_data <-
        plot_data %>%
        left_join(
          data %>% dplyr::select(one_of(c('idGKG', extra_columns)))
        ) %>%
        suppressMessages()
    }

    plot_data <-
      plot_data %>%
      mutate_if(is.logical,
                funs(ifelse(. %>% is.na(), FALSE, .)))

    if (!trelliscope_parameters$title %>% is_null()) {
      title <-
        trelliscope_parameters$title
    }

    if (trelliscope_parameters$rows %>% is_null()) {
      rows <- 1
    } else {
      rows <-
        trelliscope_parameters$rows
    }

    if (trelliscope_parameters$columns %>% is_null()) {
      columns <- 2
    } else {
      columns <-
        trelliscope_parameters$columns
    }
    no_path <-
      trelliscope_parameters$path %>% is_null()

    if (no_path) {
    viz <-
      plot_data %>%
      mutate(
        panel = trelliscopejs::img_panel(urlImage),
        urlArticle = trelliscopejs::cog_href(documentSource),
        idItem = 1:n()
      ) %>%
      select(idItem, idGKG, everything()) %>%
      arrange(idItem) %>%
      trelliscopejs::trelliscope(
        name = title,
        nrow = rows,
        ncol = columns,
        state = list(labels = c(
          "domainSource", "idGKG", main_name, 'urlArticle'
        ))
      )
    } else {
      file_path <-
        trelliscope_parameters$path
      viz <-
        plot_data %>%
        mutate(
          panel = trelliscopejs::img_panel(urlImage),
          urlArticle = trelliscopejs::cog_href(documentSource),
          idItem = 1:n()
        ) %>%
        select(idItem, idGKG, everything()) %>%
        arrange(idItem) %>%
        trelliscopejs::trelliscope(
          name = title,
          nrow = rows,
          ncol = columns,
          path = file_path,
          state = list(labels = c(
            "domainSource", "idGKG", main_name, 'urlArticle'
          ))
        )
    }
    return(viz)
  }


#' Trelliscope gkg data
#'
#' @param data
#' @param domains
#' @param random_domains
#' @param only_pictures
#' @param gkg_parse \itemize{
#' \item 'tone',
#' \item 'dates',
#' \item 'events',
#' \item 'gcam',
#' \item 'locations',
#' \item 'names',
#' \item 'numerics',
#' \item 'organizations',
#' \item 'people',
#' \item 'quotes',
#' \item 'social',
#' \item 'themes',
#' \item 'xml
#' }
#'
#' @return
#' @export
#' @import dplyr purrr tidyr trelliscopejs stringr
#' @examples
visualize_gkg_tv_trelliscope <-
  function(data,
           stations = c(
             "CNN",
             "Bloomberg",
             "FBC",
             "BBCNews",
             "WTTG"
           ),
           random_stations = 5,
           only_pictures = TRUE,
           gkg_parse = 'people') {
    check_for_trelliscope_js()

    station_search <-
      c()
    has_random_stations <-
      !random_stations %>% is_null()
    if (has_random_stations) {
      random_tv <-
        data$nameSource %>%
        unique() %>%
        sample(random_stations)

      station_search <-
        c(station_search, random_tv)
    }

    if (!stations %>% is_null()) {
      station_search <-
        c(station_search, stations) %>%
        str_to_upper()
    }

    station_search <-
      station_search %>% str_to_upper() %>%
      unique() %>% paste0(collapse = '|')

    data <-
      data %>%
      filter(nameSource %>% str_detect(station_search))

    if (!gkg_parse %>% is_null()) {
      gkg_options <-
        c(
          'tone',
          'dates',
          'events',
          'gcam',
          'locations',
          'names',
          'numerics',
          'organizations',
          'people',
          'quotes',
          'social',
          'themes',
          'xml'
        )
      gkg_parse <-
        gkg_parse %>% str_to_lower()
      if (!gkg_parse %in% gkg_options) {
        stop(list(
          "Parse options can only be:\n",
          paste(gkg_options, collapse = '\n')
        ) %>% reduce(paste0))
      }

      is_tone <-
        gkg_parse == 'tone'

      if (is_tone) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_article_tone(filter_na = TRUE, return_wide = FALSE) %>%
          left_join(data %>% select(idGKG:nameTVShow)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('scoreTone')
      }

      is_xml <-
        gkg_parse == 'xml'

      if (is_xml) {
        plot_data <-
          data %>%
          filter(!xmlExtras %>% is.na()) %>%
          parse_xml_extras(return_wide = FALSE) %>%
          left_join(data %>% select(idGKG:nameTVShow)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('item', 'value')
      }

      is_date <-
        gkg_parse == 'date'

      if (is_date) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_dates(filter_na = TRUE, return_wide = FALSE) %>%
          left_join(data %>% select(idGKG:nameTVShow)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('dateResolution')
      }

      is_events <-
        gkg_parse == 'events'

      if (is_events) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_event_counts(return_wide = FALSE, filter_na = TRUE) %>%
          left_join(get_codes_gkg_themes()) %>%
          left_join(data %>% select(idGKG:nameTVShow)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('codeGKGTheme',
            'countEvent',
            'entityEvent',
            'idADM1Code')
      }

      is_gcam <-
        gkg_parse == 'gcam'

      if (is_gcam) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_gcams(return_wide = FALSE, filter_na = TRUE) %>%
          left_join(get_codes_gcam()) %>%
          left_join(data %>% select(idGKG:nameTVShow)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('idGCAM', 'dimensionHumanName')
      }

      is_locations <-
        gkg_parse == 'locations'

      if (is_locations) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_locations(return_wide = FALSE, filter_na = TRUE) %>%
          left_join(get_codes_stability_locations()) %>%
          left_join(data %>% select(idGKG:nameTVShow)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('idADM1Code', 'location')
      }

      is_names <-
        gkg_parse == 'names'

      if (is_names) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_names(return_wide = FALSE, filter_na = TRUE) %>%
          left_join(data %>% select(idGKG:nameTVShow)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('nameMentionedName')
      }

      is_numerics <-
        gkg_parse == 'numerics'

      if (is_numerics) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_numerics(return_wide = FALSE, filter_na = TRUE) %>%
          left_join(data %>% select(idGKG:nameTVShow)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('amountTerm', 'amountValue')
      }

      is_organizations <-
        gkg_parse == 'organizations'

      if (is_organizations) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_organizations(return_wide = FALSE, filter_na = TRUE) %>%
          left_join(data %>% select(idGKG:nameTVShow)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('nameOrganization')
      }

      is_people <-
        gkg_parse == 'people'

      if (is_people) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_people(people_column = 'personsCharLoc', return_wide = FALSE, filter_na = TRUE) %>%
          left_join(data %>% select(idGKG:nameTVShow)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('namePerson')
      }

      is_quotes <-
        gkg_parse == 'quotes'

      if (is_quotes) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_quotes(return_wide = FALSE, filter_na = TRUE) %>%
          left_join(data %>% select(idGKG:nameTVShow)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('lengthQuote', 'textQuote')
      }

      is_social <-
        gkg_parse == 'social'

      if (is_social) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_social_embeds() %>%
          left_join(data %>% select(idGKG:nameTVShow)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('domainSocialMediaImageEmbed')
      }

      is_themes <-
        gkg_parse == 'themes'

      if (is_themes) {
        plot_data <-
          data %>%
          parse_gkg_mentioned_themes(return_wide = F) %>%
          left_join(get_codes_gkg_themes() %>% dplyr::rename(codeTheme = codeGKGTheme)) %>%
          left_join(data %>% select(idGKG:nameTVShow)) %>%
          suppressMessages() %>%
          suppressWarnings() %>%
          select(-c(isDocumentURL, idSourceCollectionIdentifier))

        main_name <-
          c('codeTheme')
      }

    } else {
      main_name <-
        ''
    }

    if (!gkg_parse %>% is_null()) {
      parse_title <-
        gkg_parse %>% str_to_title()
    } else {
      parse_title <- ''
    }
    dates <-
      data$dateTimeDocument %>% as.Date("%Y-%m-%d") %>% unique()

    dates <-
      dates[!dates %>% is.na()] %>% .[[1]]

    title <-
      list('GDELT GKG TV Explorer for ',  parse_title, ' ', dates) %>%
      reduce(paste0) %>% str_trim()

    viz <-
      plot_data %>%
      mutate(urlImage = "http://www.novelupdates.com/img/noimagefound.jpg") %>%
      mutate(
        panel = trelliscopejs::img_panel(urlImage),
        urlArchiveVideo = trelliscopejs::cog_href(urlArchiveVideo),
        idItem = 1:n()
      ) %>%
      select(idItem, idGKG, everything()) %>%
      arrange(idItem) %>%
      trelliscopejs::trelliscope(
        name = title,
        nrow = 1,
        ncol = 2,
        state = list(
          labels = c(
            'dateTimeDocument',
            'urlArchiveVideo',
            "idTVNetwork",
            "nameTVShow",
            main_name,
            'idTVNetwork'
          )
        )
      )

    return(viz)
  }
abresler/gdeltr2 documentation built on July 26, 2023, 11:06 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
abresler/gdeltr2
R's modern interface GDELT Project Interface

R/gdelt_event_gkg.R
In abresler/gdeltr2: R's modern interface GDELT Project Interface

R Package Documentation

Browse R Packages

We want your feedback!

abresler/gdeltr2 R's modern interface GDELT Project Interface

R/gdelt_event_gkg.R In abresler/gdeltr2: R's modern interface GDELT Project Interface

R Package Documentation

Browse R Packages

We want your feedback!

abresler/gdeltr2
R's modern interface GDELT Project Interface

R/gdelt_event_gkg.R
In abresler/gdeltr2: R's modern interface GDELT Project Interface