R/create_union_recalculated.R

Defines functions sum.without.na get.oth.info create.union.recalculated

Documented in create.union.recalculated

#' @export
create.union.recalculated <- function(channel, export, config){
  output <- create.union(channel, export)
  config.columns <- paste(tolower(channel), "recalculate", "columns", sep = ".")
  
  if (dim(output)[1] > 0 & paste(channel, 'Budget', sep = '.') %in% names(output)) {
    if (config.columns %in% names(config)) {
      columns <- config[[config.columns]]
      #output <- as.data.frame(rbind(output, rbindlist(lapply(split(output, factor(output$Content.ID)), get.oth.info, columns))))
      
      for (column in columns) {
        if (column %in% names(output)) {
          output[, column] <- as.numeric(gsub(",00","", output[, column]))
        } else {
          columns <- setdiff(columns, column)
        }
      }
      
      common.ids <-
        intersect(output[grep("Unpaid_Info",output$Source), "Content.ID"], output[union(grep("Paid_Info", output$Source), grep("Ads_Manager_Info", output$Source)), "Content.ID"])
      
      output.oth <-
        output[output$Source %in% grep("Unpaid_Info",output$Source, value = TRUE) &
                 output$Content.ID %in% common.ids,c("Content.ID", columns)]
      output.paid <-
        ddply(output[output$Source %in% union(grep("Paid_Info", output$Source, value = TRUE), grep("Ads_Manager_Info", output$Source, value = TRUE)) &
                       output$Content.ID %in% common.ids,], "Content.ID",
              summarise_each_, funs(sum.without.na), columns)
      output.oth[is.na(output.oth)] <- 0
      output.paid <- output.paid[order(output.paid$Content.ID),]
      output.oth <- output.oth[order(output.oth$Content.ID),]
      
      output <- output[order(output$Content.ID),]
      output[output$Source %in% grep("Unpaid_Info",output$Source, value = TRUE) &
               output$Content.ID %in% common.ids, columns] <-
        output.oth[, columns] - output.paid[, columns]
      
      
      #for(column in columns) {
      #  content.ids <- output[output$Country == "OTH" & (!is.na(output[, column]) & output[,column] < 0 | is.na(output[, column])), "Content.ID"]
      #  for (content.id in content.ids){
      #    output[output$Content.ID == content.id & output$Country != "OTH", column] <- NA
      #  }
      #}
      
      #for(column in columns) {
      #  content.ids <- output[output$Country == "OTH" & !is.na(output[ , column]) & output[, column] >= 0, "Content.ID"]
      #  for (content.id in content.ids){
      #    output[output$Content.ID == content.id & output$Country == "OTH", column] <- NA
      #  }
      #}
    }
  }

  output
}

get.oth.info <- function(x, columns)
{
  if(dim(subset(x, Country != "OTH"))[1] > 0 && dim(subset(x, Country == "OTH"))[1] > 0){
    output <- subset(x, Country == "OTH")[1, ]
    output[, names(output)[!(names(output) %in% c("Source", "Campaign.Name", "Channel", "Content.ID", "Content.Type", 
                                                  "Permalink", "Message.Text", "Date"))]] <- NA
    output[, "Country"] <- "OTH"
    output[, "Source"] <- "R Script Calculation"
    x[, columns] <- sapply(x[, columns], as.numeric)
    output[, columns] <- mapply(function(column) subset(x, Country == "OTH", select = column) -
                                  sum(subset(x, Country != "OTH", select = column), na.rm = TRUE), columns)
    output
  }
}

sum.without.na <- function(x) {
  x <- as.numeric(x)
  sum(x, na.rm = TRUE)
}
omelyanchikd/merger documentation built on July 13, 2017, 6:24 p.m.