R/convert_bookingData_fromNonCumulative_toCumulative.2.R

Defines functions convert_bookingData_fromNonCumulative_toCumulative.2

Documented in convert_bookingData_fromNonCumulative_toCumulative.2

#' Process booking data on a flight level
#' 
#' This function processes the booking data on flight level defined as unique combination of flight id and departure date. If some weeks-2-departure are missing than they are introduced (whole row) and cumulative bookings are calculated per flight.
#' 
#' @param df A data frame of booking data with the following columns in the following left-to-right order: flight id, departure date, weeks-2-departure, non-cumulative bookings on the given departure, revenue per person.
#' @param choose_minAnDmax_w2d_forMe A boolean indicating whether to let the algorithm choose the range of weeks-2-departure from the data.
#' @param min_W2D A numeric of minimum weeks-to-departure. Defults to 0. Relevant only if choose_minAnDmax_w2d_forMe is set to FALSE.
#' @param max_w2D A numeric of maximum weeks-to-departure. Defults to 60. Relevant only if choose_minAnDmax_w2d_forMe is set to FALSE.
#' @export 


convert_bookingData_fromNonCumulative_toCumulative.2 = function(df,choose_minAnDmax_w2d_forMe = T, min_W2D=0,max_w2D=60){
  o = as.data.frame(matrix(nrow=1,ncol=ncol(df)+1))
  colnames(o) = c(colnames(df), "cum_NET_PAX")
  uniqueFlightIDS = unique(df[,1])
  uniqueDepDate = unique(df[,2])
  if(choose_minAnDmax_w2d_forMe){
    W2D_range = min(df[,3][!is.na(df[,3])]):max(df[,3][!is.na(df[,3])])
  }
  else{
    W2D_range = min_W2D:max_w2D
  }
  for(flight in 1:length(uniqueFlightIDS)){
    print(uniqueFlightIDS[flight])
    f = uniqueFlightIDS[flight]
    for(dd in 1:length(uniqueDepDate)){
      DepDate = uniqueDepDate[dd]
      d = df[df[,1]==f&df[,2]==DepDate,]
      ### discovered that need to deal with cases where a combination of flight ID and departure date doesn't exists!
      if(nrow(d)==0){
        next
      }
      missing = W2D_range[!W2D_range %in% d[,3]]
      
      ### attaching missing data
      
      missing_df = as.data.frame(cbind(rep(d[1,1], length(missing)),
                                       rep(d[1,2], length(missing)),
                                       missing,
                                       rep(0,length(missing)),
                                       rep(0,length(missing))))
      
      colnames(missing_df) = colnames(df)
      d = rbind(d,missing_df)
      d[,3] = as.numeric(d[,3])
      d = d[order(d[,3],decreasing = T),]
      d[,4] = as.numeric(d[,4])
      cum_NET_PAX = cumsum(d[,4])
      d = cbind(d,cum_NET_PAX)
      o = rbind(o,d)
    }
  }
  o = o[-1,]
  row.names(o) = paste(o[,1],o[,2],o[,3],sep = "-")
  o
}
msxakk89/dat documentation built on Aug. 3, 2020, 6:39 p.m.