sbw: Stable weights that balance covariates

sbw = function(data_frame, t_ind, bal_covs, bal_tols, bal_tols_sd=TRUE, target="treated", l_norm="l_2", w_min=0, normalize=1, solver, display=0, max_iter=100000,  rel_tol=1e-4, abs_tol=1e-4, gap_stop=TRUE, adaptive_rho=TRUE) {

	if (target=="treated") {
		data_frame = data_frame[order(data_frame[, t_ind], decreasing=TRUE), ]
		t_ind = data_frame[, t_ind]
		bal_covs = data_frame[, bal_covs, drop = FALSE]
		if (length(bal_tols)>1) {
			if (bal_tols_sd==TRUE) {
				bal_tols = round(apply(bal_covs, 2, sd)*bal_tols, 9)
			}
		}
		if (length(bal_tols)==1) {
			if (bal_tols_sd==TRUE) {
				bal_tols = round(apply(bal_covs, 2, sd)*bal_tols, 9)
			}
			if (bal_tols_sd==FALSE) {
				bal_tols = rep(bal_tols, length(bal_tols))
			}
		}
		bal_covs = as.matrix(bal_covs)
	}
	if (target=="controls") {
		data_frame = data_frame[order(data_frame[, t_ind], decreasing=FALSE), ]
		t_ind = 1-data_frame[, t_ind]
		bal_covs = data_frame[, bal_covs, drop = FALSE]
		if (length(bal_tols)>1) {
			if (bal_tols_sd==TRUE) {
				bal_tols = round(apply(bal_covs, 2, sd)*bal_tols, 9)
			}
		}
		if (length(bal_tols)==1) {
			if (bal_tols_sd==TRUE) {
				bal_tols = round(apply(bal_covs, 2, sd)*bal_tols, 9)
			}
			if (bal_tols_sd==FALSE) {
				bal_tols = rep(bal_tols, length(bal_tols))
			}
		}
		bal_covs = as.matrix(bal_covs)
	}
	if (target=="all") {
		data_frame = data_frame[order(data_frame[, t_ind], decreasing=TRUE), , drop = FALSE]
		t_ind = data_frame[, t_ind]
		bal_covs = data_frame[, bal_covs, drop = FALSE]
		if (length(bal_tols)>1) {
			if (bal_tols_sd==TRUE) {
				bal_tols = round(apply(bal_covs, 2, sd)*bal_tols, 9)
			}
		}
		if (length(bal_tols)==1) {
			if (bal_tols_sd==TRUE) {
				bal_tols = round(apply(bal_covs, 2, sd)*bal_tols, 9)
			}
			if (bal_tols_sd==FALSE) {
				bal_tols = rep(bal_tols, length(bal_tols))
			}
		}
		bal_covs = as.matrix(rbind(bal_covs, bal_covs[t_ind==0, , drop = FALSE]))
		t_ind = c(rep(1, length(t_ind)), rep(0, sum(1-t_ind)))
	}

	n_t = sum(t_ind)
	n_c = length(t_ind)-n_t

	n_bal_covs = ncol(bal_covs)

	if (l_norm == "l_1") {
		cvec = c(rep(0, n_c), rep(1, n_c))
	}
	if (l_norm == "l_inf") {
		cvec = c(rep(0, n_c), 1)
	}
	if (l_norm == "l_2") {
		cvec = rep(0, n_c)
		Qmat = diag(n_c)-1/n_c*(matrix(1, nrow=n_c, ncol=n_c))
	}

	row_ind_cur = 0
	if (l_norm == "l_1") {
		row_ind_l_norm = sort(rep(1:(n_c*2), n_c+1))+row_ind_cur
		col_ind_l_norm = matrix(rep(1:n_c, n_c*2), nrow = n_c)
		col_ind_l_norm = rbind(col_ind_l_norm, sort(rep((1:n_c)+n_c, 2)))
		col_ind_l_norm = as.vector(col_ind_l_norm)
		vals_l_1 = rep(c(rep(-1/n_c, n_c), -1, rep(1/n_c, n_c), -1), n_c)
		aux_ind = ((sort(rep(1:n_c, 2))-1)*(n_c+1))+sort(rep((0:(n_c-1)), 2))+c(1, 1, rep(0, (n_c*2)-2))
		vals_l_1[aux_ind] = rep(c(1, -1), n_c)+vals_l_1[aux_ind]
		row_ind_cur = max(row_ind_l_norm)
	}
	if (l_norm == "l_inf") {
		row_ind_l_norm = sort(rep(1:(n_c*2), n_c+1))+row_ind_cur
		col_ind_l_norm = matrix(rep(1:n_c, n_c*2), nrow = n_c)
		col_ind_l_norm = rbind(col_ind_l_norm, rep(n_c+1, n_c*2))
		col_ind_l_norm = as.vector(col_ind_l_norm)
		vals_l_1 = rep(c(rep(-1/n_c, n_c), -1, rep(1/n_c, n_c), -1), n_c)
		aux_ind = ((sort(rep(1:n_c, 2))-1)*(n_c+1))+sort(rep((0:(n_c-1)), 2))+c(1, 1, rep(0, (n_c*2)-2))
		vals_l_1[aux_ind] = rep(c(1, -1), n_c)+vals_l_1[aux_ind]
		row_ind_cur = max(row_ind_l_norm)
	}
	bal_covs_c = bal_covs[t_ind==0, , drop = FALSE]
	row_ind_mom = sort(rep(1:(n_bal_covs*2), n_c))+row_ind_cur
	col_ind_mom = rep(1:n_c, n_bal_covs*2)
	vals_mom = cbind(bal_covs_c, -bal_covs_c)[, sort(rep(1:n_bal_covs, 2))+rep(c(0, n_bal_covs), n_bal_covs), drop = FALSE]
	row_ind_cur = max(row_ind_mom)+1
	if (normalize == 1) {
		row_ind_normalize = rep(row_ind_cur, n_c)
		col_ind_normalize = 1:n_c
		vals_normalize = rep(1, n_c)
		row_ind_cur = max(row_ind_normalize)+1
	}

	if (l_norm == "l_1" | l_norm == "l_inf") {
			row_ind = c(row_ind_l_norm, row_ind_mom)
			col_ind = c(col_ind_l_norm, col_ind_mom)
			vals = c(vals_l_1, vals_mom)
			if (normalize == 1) {
				row_ind = c(row_ind_l_norm, row_ind_mom, row_ind_normalize)
				col_ind = c(col_ind_l_norm, col_ind_mom, col_ind_normalize)
				vals = c(vals_l_1, vals_mom, vals_normalize)
			}
	}
	if (l_norm == "l_2") {
		row_ind = c(row_ind_mom)
		col_ind = c(col_ind_mom)
		vals = c(vals_mom)
		if (normalize == 1) {
			row_ind = c(row_ind_mom, row_ind_normalize)
			col_ind = c(col_ind_mom, col_ind_normalize)
			vals = c(vals_mom, vals_normalize)
		}
	}
	aux = cbind(row_ind, col_ind, vals)[order(col_ind), , drop = FALSE]
	Amat = slam::simple_triplet_matrix(i = aux[, 1], j = aux[, 2], v = aux[, 3])

	if (l_norm == "l_1" | l_norm == "l_inf") {
		bvec = rep(0, n_c*2)
		bal_covs_t_mean = apply(bal_covs[t_ind==1, , drop = FALSE], 2, mean)
		bvec_mom = c(bal_covs_t_mean+bal_tols, -bal_covs_t_mean+bal_tols)[sort(rep(1:n_bal_covs, 2))+rep(c(0, n_bal_covs), n_bal_covs)]
		bvec = c(bvec, bvec_mom)
		if (normalize == 1) {
			bvec = c(bvec, 1)
		}
	}
	if (l_norm == "l_2") {
		bal_covs_t_mean = apply(bal_covs[t_ind==1, , drop = FALSE], 2, mean)
		bvec_mom = c(bal_covs_t_mean+bal_tols, -bal_covs_t_mean+bal_tols)[sort(rep(1:n_bal_covs, 2))+rep(c(0, n_bal_covs), n_bal_covs)]
		bvec = bvec_mom
		if (normalize == 1) {
			bvec = c(bvec, 1)
		}
	}

	if (l_norm == "l_1") {
		lb = rep(w_min, n_c)
		lb = c(lb, rep(0, n_c))
	}
	if (l_norm == "l_inf") {
		lb = rep(w_min, n_c)
		lb = c(lb, 0)
	}
	if (l_norm == "l_2") {
		lb = rep(w_min, n_c)
	}

	if (l_norm == "l_1") {
		ub = rep(Inf, n_c*2)
	}
	if (l_norm == "l_inf") {
		ub = rep(Inf, 1+n_c)
	}
	if (l_norm == "l_2") {
		ub = rep(Inf, n_c)
	}

	if (l_norm == "l_1") {
		sense = rep("L", n_c*2)
		sense = c(sense, rep("L", n_bal_covs*2))
		if (normalize == 1) {
			sense = c(sense, "E")
		}
	}
	if (l_norm == "l_inf") {
		sense = rep("L", n_c*2)
		sense = c(sense, rep("L", n_bal_covs*2))
		if (normalize == 1) {
			sense = c(sense, "E")
		}
	}
	if (l_norm == "l_2") {
		sense = rep("L", n_bal_covs*2)
		if (normalize == 1) {
			sense = c(rep("L", n_bal_covs*2), "E")
		}
	}

	if (l_norm == "l_1") {
		var_type = rep("C", n_c*2)
	}
	if (l_norm == "l_inf") {
		var_type = rep("C", 1+n_c)
	}
	if (l_norm == "l_2") {
		var_type = rep("C", n_c)
	}

	if (solver == "cplex") {

		cat(format("  CPLEX optimizer is open..."), "\n")
		cat(format("  Finding the optimal weights..."), "\n")
		ptm = proc.time()
		if (l_norm == "l_1" | l_norm == "l_inf") {
			out = Rcplex(cvec, Amat, bvec, lb = lb, ub = ub, sense = sense, vtype = var_type, n = 1, control = list(trace = display))
		}
		if (l_norm == "l_2") {
			out = Rcplex(cvec, Amat, bvec, 2*Qmat, lb = lb, ub = ub, sense = sense, vtype = var_type, n = 1, control = list(trace = display))
		}
		time = (proc.time()-ptm)[3]

		weights = (out$xopt)[1:n_c]
		cat(format("  Optimal weights found."), "\n")

		data_frame_weights = data_frame
		if (target=="treated") {
			data_frame_weights$weights = c(rep(0, n_t), weights)
		}
		if (target=="controls") {
			data_frame_weights$weights = c(rep(0, n_t), weights)
			data_frame_weights = data_frame_weights[order(data_frame_weights$t_ind, decreasing=TRUE), ]
		}
		if (target=="all") {
			data_frame_weights$weights = c(rep(0, nrow(data_frame_weights)-length(weights)), weights)
		}

		obj_total = out$obj

		status = out$status

		dual_vars = out$extra$lambda[-length(out$extra$lambda)]

		return(list(obj_total = obj_total, time = time, status = status, data_frame_weights = data_frame_weights, dual_vars = dual_vars))
	}
	if (solver == "gurobi") {
		model = list()
		model$modelsense = 'min'
		model$obj = cvec
		model$A = Amat
		model$sense = rep(NA, length(sense))
		model$sense[sense=="E"] = '='
		model$sense[sense=="L"] = '<='
		model$sense[sense=="G"] = '>='
		model$rhs = bvec
		model$vtype = var_type
		model$ub = ub
		params = list(OutputFlag = display)

		cat(format("  Gurobi optimizer is open..."), "\n")
		cat(format("  Finding the optimal weights..."), "\n")

		ptm = proc.time()
		if (l_norm == "l_1" | l_norm == "l_inf") {
			out = gurobi(model, params)
		}
		if (l_norm == "l_2") {
			model$Q = Qmat
			out = gurobi(model, params)
		}
		time = (proc.time()-ptm)[3]

		weights = (out$x)[1:n_c]
		cat(format("  Optimal weights found."), "\n")

		data_frame_weights = data_frame
		if (target=="treated") {
			data_frame_weights$weights = c(rep(0, n_t), weights)
		}
		if (target=="controls") {
			data_frame_weights$weights = c(rep(0, n_t), weights)
			data_frame_weights = data_frame_weights[order(data_frame_weights$t_ind, decreasing=TRUE), ]
		}
		if (target=="all") {
			data_frame_weights$weights = c(rep(0, nrow(data_frame_weights)-length(weights)), weights)
		}

		obj_total = out$obj

		status = out$status

		dual_vars = out$pi[-length(out$pi)]

		return(list(obj_total = obj_total, time = time, status = status, data_frame_weights = data_frame_weights, dual_vars = dual_vars))
	}
	if (solver == "quadprog") {

		cat(format("  quadprog optimizer is open..."), "\n")
		cat(format("  Finding the optimal weights..."), "\n")
		ptm = proc.time()
		if (l_norm == "l_1" | l_norm == "l_inf") {
			stop("Minimizing the l_1 or l_inf norm is not a valid option with quadprog; use either cplex or gurobi")
		}
		if (l_norm == "l_2") {
			Dmat = matrix(0,nrow(Qmat),nrow(Qmat))
			diag(Dmat) = 2
			dvec = rep(0, length(cvec))
			Amat = as.matrix(Amat)
			## Intervert the first line with the last line of
			## Amat and bvec
			###bvec
			tmp=bvec[1]
			bvec[1]=bvec[length(bvec)]
			bvec[length(bvec)]=tmp
			###Amat
			tmp = Amat[1,]
			Amat[1,] = Amat[nrow(Amat),]
			Amat[nrow(Amat),] = tmp
			##Adding the non-negativity constraints
			###Amat
			Amat2=matrix(0,nrow(Amat)+ncol(Amat),ncol(Amat))
			Amat2[1:nrow(Amat),1:ncol(Amat)]=Amat
			Amat2[(1+nrow(Amat)):(nrow(Amat)+ncol(Amat)),1:ncol(Amat)]=-1*diag(ncol(Amat))
			###bvec
			bvec2=rep(0,(length(bvec)+nrow(Qmat)))
			bvec2[1:length(bvec)]=bvec
			##Adapting the matrices to the quadprog
			Amat4 = -t(Amat2)
			bvec4 = -bvec2
			##Solving the QP
			out = solve.QP(Dmat, dvec, Amat4, bvec4, meq=1)
		}
		# cbind(t(Amat4)%*%out$solution, bvec4, t(Amat4)%*%out$solution>=bvec4)
		# table(out$solution>=0)
		time = (proc.time()-ptm)[3]

		weights = (out$solution)[1:n_c]
		weights[weights <= 0] = 0
		cat(format("  Optimal weights found."), "\n")

		data_frame_weights = data_frame
		if (target=="treated") {
			data_frame_weights$weights = c(rep(1, n_t), weights)
		}
		if (target=="controls") {
			data_frame_weights$weights = c(rep(1, n_c), weights)
			data_frame_weights = data_frame_weights[order(data_frame_weights$t_ind, decreasing=TRUE), , drop = FALSE]
		}
		if (target=="all") {
			data_frame_weights$weights = c(rep(0, nrow(data_frame_weights)-length(weights)), weights)
		}

		obj_total = out$value-1/n_c

		status = NA

		dual_vars = out$Lagrangian[-length(out$Lagrangian)]

		return(list(obj_total = obj_total, time = time, status = status, data_frame_weights = data_frame_weights, dual_vars = dual_vars))
	}
	if (solver == "pogs") {

	  cat(format("  POGS optimizer is open..."), "\n")
	  cat(format("  Finding the optimal weights..."), "\n")
	  ptm = proc.time()
	  if (l_norm == "l_1" | l_norm == "l_inf") {
	    stop("Minimizing the l_1 or l_inf norm is not a valid option with pogs; use either cplex or gurobi")
	  }
	  if (l_norm == "l_2") {
		if(normalize == 1){
			nbL = nrow(Amat)-1
			f = list(h = c(kIndLe0(nbL), kIndEq0(1)), b = bvec, a = 1, c = 1, d = 0, e = 0)
			g = list(h = kIndGe0(), c=1,a=1,d=0,e=1)
			Amat = as.matrix(Amat)
			out = pogs(Amat, f, g,params=list(rel_tol = abs_tol, abs_tol = abs_tol, max_iter = max_iter, adaptive_rho = adaptive_rho, verbose = display, gap_stop = gap_stop))
			}
		else{
			#stop("You can not")
			Amat = as.matrix(Amat)
			nbL = nrow(Amat)
			nbC = ncol(Amat)
			print("Amat")
			#print(Amat)
			Amat2 = matrix(0,(nbL+1),(nbC+1))
			Amat2[1:nbL,1:nbC] = Amat
			Amat2[(nbL+1),] = c(rep(1,nbC),-1)
			#print("Amat2")
			#print(Amat2)
			f = list(h = c(kIndLe0(nbL), kIndEq0(1)), b = c(bvec,0), a = 1, c = 1, d = 0, e = 0)
			g = list(h = kIndGe0(), c = 1, a = 1, d = 0, e = c(rep(2,nbC),-2/n_c))
			Amat2 = as.matrix(Amat2)
			out = pogs(Amat2, f, g,params=list(rel_tol = abs_tol, abs_tol = abs_tol, max_iter = max_iter, adaptive_rho = adaptive_rho, verbose = display, gap_stop = gap_stop))
			}
	    }
	  time = (proc.time()-ptm)[3]

	  weights = (out$x)[1:n_c]
	  cat(format("  Optimal weights found."), "\n")

	  data_frame_weights = data_frame
	  if (target=="treated") {
	    data_frame_weights$weights = c(rep(0, n_t), weights)
	  }
	  if (target=="controls") {
	    data_frame_weights$weights = c(rep(0, n_t), weights)
	    data_frame_weights = data_frame_weights[order(data_frame_weights$t_ind, decreasing=TRUE), ]
	  }
	  if (target=="all") {
	    data_frame_weights$weights = c(rep(0, nrow(data_frame_weights)-length(weights)), weights)
	  }

	  obj_total = 2*out$optval-1/n_c

	  status = out$status

	  dual_vars = out$u

	  return(list(obj_total = obj_total, time = time, status = status, data_frame_weights = data_frame_weights, dual_vars = dual_vars))
	}
}