weighted_logistic_regression/1_lp_and_masking_vectors/1_get_variable_and_mask_a.R

###############################################################
# identify which variables are in a study a and mask them
###############################################################
a_in="/home/rw13742/Documents/datashield/testing/vertical_comms/data/test.data/simulated_self_harm_data/starting_files/a.csv"
x_variables_in="/home/rw13742/Documents/datashield/testing/vertical_comms/data/test.data/simulated_self_harm_data/starting_files/x_variables.csv"
beta_table_in="/home/rw13742/Documents/datashield/testing/vertical_comms/data/test.data/simulated_self_harm_data/beta.csv"

mv2_betaa_out="/home/rw13742/Documents/datashield/testing/vertical_comms/data/test.data/simulated_self_harm_data/mv2_betaa.csv"
n_out="/home/rw13742/Documents/datashield/testing/vertical_comms/data/test.data/simulated_self_harm_data/n.csv"
mv2_out="/home/rw13742/Documents/datashield/testing/vertical_comms/data/test.data/simulated_self_harm_data/mv2.csv"

####################################
#read in data
####################################

data<-as.matrix(read.csv(a_in, header=TRUE))
x_variables<-as.matrix(read.csv(x_variables_in, header=TRUE))
beta_table<-as.matrix(read.csv(beta_table_in, header=TRUE))

######################################
#identify which cols have any of the
# x variables and subset the data
#######################################
x_tf<-x_variables %in% colnames(data)
x_true<-which(x_tf == TRUE)
study_x_variables<-x_variables[x_true]

xcol<-which(colnames(data) == study_x_variables)

a<-data[,xcol]
a<-as.matrix(a)

a_names<-rep(NA,ncol(a))
names<-colnames(data)
for (i in 1:length(a_names)){
a_names[i]<-names[xcol[i]]
}

colnames(a)<-a_names
head(a) #subset of data that contains all x_variables in this study

#########################################
#create masking vector and product must 
#be square dims of ncol(a)
#########################################
mask <- runif(nrow(a), 1, 10) #masking vector must be same as length of each column in a

###########################################################################
x_variable_number<- c(1) #will need to use args to insert the order here  #
###########################################################################

beta_col<- x_variable_number + 1

#############################################################################################
iteration_number<-1 #will need to use args to insert which iteration we are on for the glm  #
#############################################################################################
betas<-beta_table[iteration_number,beta_col]
betas<-as.matrix(betas)
print(betas)

#need to automate expanding number of betas and explanetory variables
mv2_betaa<-rep(NA, ncol(a))
for (i in 1:nrow(a)){
mv2_betaa[i]<-mask[i] + (betas[1] * a[i]) #mv2 + beta1*x1
}

write.table(mv2_betaa, row.names=FALSE, sep=",", file = mv2_betaa_out)
write.table(nrow(a), row.names=FALSE, sep=",", file = n_out) #to send to analysis computer
write.table(mask, row.names=FALSE, sep=",", file = mv2_out) #mv2
Vertical-Datashield/bw.vertical.comms documentation built on May 9, 2019, 9:44 p.m.