#' @title Merge the 5 files and gives one in-line CSV and one binded file.
#'
#' @description takes the address of files from the user using prompt and pass these address to another function which cleans and binds the file.The computation time can be high depending upon the size of the file. Be patient.
#' @param d address of the directory
#' @param f1 name/address of the biopsy csv
#' @param f2 name/address of the treatment file
#' @param f3 name/address of the demographic file
#' @param f4 name/aadress of the encounter file
#' @param f5 name/address of the complication file
#' @param address function which prmopts the user to add the address of the files.
#' @param myfunction function which takes these address as arguement and generates the files as required.
#' @return NULL
#' @encoding UTF-8
#This Function takes the file path of the files as an arguement and returns the merged in-line file and a binded CSV
myfunction<-function(d,f1,f2,f3,f4,f5){
#install.packages("dplyr")
#library(dplyr)
#install.packages("gtools")
#library(gtools)
#user defined directory
setwd(dir=d)
set.seed(101)
#read biopsy file
data_1 <- read.csv(f1,header = FALSE)
names(data_1) <- as.matrix(data_1[1, ])
data_1 <- data_1[-1, ]
data_1[] <- lapply(data_1, function(x) type.convert(as.character(x)))
#read complication file
data_2<-read.csv(f2)
names(data_2) <- as.matrix(data_2[1, ])
data_2 <- data_2[-1, ]
data_2[] <- lapply(data_2, function(x) type.convert(as.character(x)))
#read demographic file
data_3<-read.csv(f3)
names(data_3) <- as.matrix(data_3[1, ])
data_3 <- data_3[-1, ]
data_3[] <- lapply(data_3, function(x) type.convert(as.character(x)))
#read encounter file
data_4<-read.csv(f4)
names(data_4) <- as.matrix(data_4[1, ])
data_4 <- data_4[-1, ]
data_4[] <- lapply(data_4, function(x) type.convert(as.character(x)))
#read treatment file
data_5<-read.csv(f5, header = FALSE)
names(data_5) <- as.matrix(data_5[1, ])
data_5 <- data_5[-1, ]
data_5[] <- lapply(data_5, function(x) type.convert(as.character(x)))
df<-merge(data_1, data_2, by = "Patient Id")
df1<-merge(df, data_3, by = "Patient Id")
df2<-merge(df1, data_4, by = "Patient Id")
df3<-merge(df2,data_5, by = "Patient Id")
write.csv(df3, file = "mergedT.csv")
merged_inline= read.csv("mergedT.csv", header = TRUE, check.names = F, as.is=TRUE)
missing_df<-merged_inline[,3572:3629]
merged_inline1<-merge(merged_inline,missing_df,by=0)
merged_inline1<-merged_inline1[,c(3:3550,5359:5416,3551:5358)]
write.csv(merged_inline1, file = "mergedf.csv")
merged_inline<-read.csv("mergedF.csv", header = TRUE, check.names = F, as.is=TRUE)
colnames(merged_inline[721])
colnames(merged_inline[5102])
encounters_inline= merged_inline[,721:5160]
pID= merged_inline[2]
encounters= data.frame(pID, encounters_inline, check.names = F)
for (i in 1:4441){
temp= names(encounters[i])
substr(temp, 1,12)
#wildcard search for Treamtment Na to remove Umlaut character
if (grepl(glob2rx("Treatment Na*"),substr(temp, 1,12))){
names(encounters)[i]= "Treatment Naive (Never on Immunosuppression)"
}
}
encountersPID= encounters[,-(2:4)]
encountersNPID= encounters[,-(1:4)]
####### Final Loop - encounter_inline to segmented by date #########################################
patients= encountersNPID[encountersPID$"Patient Id"==1,]
patients1 = patients
encountersComplete= patients1[0,1:103]
l <- list()
h=1
for(j in 1:4437){
# LenName= nchar(names(encountersNPID[j]))
ColName= substr(colnames(encountersNPID[j]),1,13)
if(ColName=="Date Of Visit"){
l[[h]] = j-1
h=h+1
}
}
for(q in 1:682){
print(q)
patients= encountersNPID[encountersPID$"Patient Id"==pID[q,],]
patients1 = patients
i=1
n=1
lines= patients1[0,1:103]
for(j in 1:4437){
if(j %in% l){
n=n+1
i=i+1
# encountersComplete= rbind(encountersComplete, lines)
}
for(m in 1:103){
LenName= nchar(names(lines[m]))
ColName= substr(colnames(patients1[j]),1,LenName)
if(ColName == names(lines[m])){
lines[i,m] = patients1[,j]
break
}
}
}
encountersComplete= rbind(encountersComplete, lines)
}
########################## BIOPSY ############################################
biopsy_inline= merged_inline[,2:87]
biopsy_inline
pID= merged_inline[2]
for (i in 1:86){
temp= names(biopsy_inline[i])
# substr(temp, 1,40)
#wildcard search for Treamtment Na to remove Umlaut character
if (grepl(glob2rx("Number Glomeruli with active vasculitic*"),substr(temp, 1,39))){
names(biopsy_inline)[i]= "Number Glomeruli with active vasculitic lesions(thrombosis + tuft disruption + cells in Bowman's space/crescents))"
}
}
for (i in 1:86){
temp= names(biopsy_inline[i])
# substr(temp, 1,40)
#wildcard search for Treamtment Na to remove Umlaut character
if (grepl(glob2rx("Site of biopsy - othe*"),substr(temp, 1,21))){
names(biopsy_inline)[i]= "Site of other biopsy"
}
}
####### Final Loop - biopsy_inline to segmented by date #########################################
# m=2
biopsyPID= biopsy_inline
biopsyNPID= biopsy_inline[,-(1)]
biop= biopsy_inline[biopsyPID$"Patient Id"==1,]
biop1 = biop
biopsyComplete= biop1[0,1:17]
for(q in 1:682){
biop= biopsyNPID[biopsyPID$"Patient Id"==pID[q,],]
biop1 = biop
i=1
n=1
lines= biop1[0,1:17]
for(j in 1:85){
for(m in 1:17){
LenName= nchar(names(lines[m]))
ColName= substr(colnames(biop1[j]),1,LenName)
if(ColName == names(lines[m])){
lines[i,m] = biop1[,j]
print(j)
#print(m)
break
}
}
if(j==17*n){
n=n+1
i=i+1
}
}
biopsyComplete= rbind(biopsyComplete, lines)
}
############################################################################################
################################ TREAMTMENT ###########################################
treatment_inline= merged_inline[,88:503]
treatment_inline
pID= merged_inline[2]
treatment_inline= data.frame(pID, treatment_inline, check.names = F)
########## Continuous Medication ############
continuous_medication= treatment_inline[,1:239]
####### Final Loop - to get the data segmented by date ###################################
continuous_medicationPID= continuous_medication
continuous_medicationNPID= continuous_medication[,-(1)]
c_med= continuous_medicationNPID[continuous_medicationPID$"Patient Id"==1,]
c_med1 = c_med
c_med_complete= c_med1[0,1:7]
for(q in 1:682){
c_med= continuous_medicationNPID[continuous_medicationPID$"Patient Id"==pID[q,],]
c_med1 = c_med
i=1
n=1
lines= c_med1[0,1:7]
for(j in 1:238){
for(m in 1:7){
LenName= nchar(names(lines[m]))
ColName= substr(colnames(c_med1[j]),1,LenName)
if(ColName == names(lines[m])){
lines[i,m] = c_med1[,j]
print(j)
break
}
}
if(j==7*n){
n=n+1
i=i+1
}
}
c_med_complete= rbind(c_med_complete, lines)
}
##########################################
########## Therapy ############
ivtherapy= treatment_inline[,240:359]
pID= merged_inline[2]
ivtherapy= data.frame(pID, ivtherapy, check.names = F)
for (i in 1:121){
temp= names(ivtherapy[i])
if (grepl(glob2rx("IV therapy - othe*"),substr(temp, 1,17))){
names(ivtherapy)[i]= "Other IV Therapy"
}
}
ivtherapyPID= ivtherapy
ivtherapyNPID= ivtherapy[,-(1)]
iv_t= ivtherapyNPID[ivtherapyPID$"Patient Id"==1,]
iv_t1 = iv_t
iv_t_complete= iv_t1[0,1:5]
for(q in 1:682){
iv_t= ivtherapyNPID[ivtherapyPID$"Patient Id"==pID[q,],]
iv_t1 = iv_t
i=1
n=1
lines= iv_t1[0,1:5]
for(j in 1:120){
for(m in 1:5){
LenName= nchar(names(lines[m]))
ColName= substr(colnames(iv_t1[j]),1,LenName)
if(ColName == names(lines[m])){
lines[i,m] = iv_t1[,j]
print(j)
break
}
}
if(j==5*n){
n=n+1
i=i+1
}
}
iv_t_complete= rbind(iv_t_complete, lines)
}
##########################################
######### Plasma Exchange #######
plexchange= treatment_inline[,360:413]
pID= merged_inline[2]
plexchange= data.frame(pID, plexchange, check.names = F)
plexchangePID= plexchange
plexchangeNPID= plexchange[,-(1)]
pl_ex= plexchangeNPID[plexchangePID$"Patient Id"==1,]
pl_ex1 = pl_ex
pl_ex_complete= iv_t1[0,1:3]
for(q in 1:682){
pl_ex= plexchangeNPID[plexchangePID$"Patient Id"==pID[q,],]
pl_ex1 = pl_ex
i=1
n=1
lines= pl_ex1[0,1:3]
for(j in 1:54){
for(m in 1:5){
LenName= nchar(names(lines[m]))
ColName= substr(colnames(pl_ex1[j]),1,LenName)
if(ColName == names(lines[m])){
lines[i,m] = pl_ex1[,j]
print(j)
break
}
}
if(j==3*n){
n=n+1
i=i+1
}
}
pl_ex_complete= rbind(pl_ex_complete, lines)
}
############################################################################################
###### Add the two columns from treatment - Donor Type and Date of transplant
############################################################################################
############################## Complications ######################################
complications_inline= merged_inline[,5161:5415]
pID= merged_inline[2]
complications_inline= data.frame(pID, complications_inline, check.names = F)
complicationsPID= complications_inline
complicationsNPID= complications_inline[,-(1)]
comp= complicationsNPID[complicationsPID$"Patient Id"==1,]
comp1 = comp
comp_complete= comp1[0,1:11]
for(q in 1:682){
print(q)
comp= complicationsNPID[complicationsPID$"Patient Id"==pID[q,],]
comp1 = comp
i=1
n=1
lines= comp1[0,1:11]
for(j in 1:255){
for(m in 1:11){
LenName= nchar(names(lines[m]))
ColName= substr(colnames(comp1[j]),1,LenName)
# substr(colnames(patients1[104]), 1, 13)
# print(ColName)
if(ColName == names(lines[m])){
lines[i,m] = comp1[,j]
break
}
}
if(j==11*n){
n=n+1
i=i+1
}
}
comp_complete= rbind(comp_complete, lines)
}
############################################################################################
############################ DEMOGRAPHICS #######################################
demographics_inline= merged_inline[,504:720]
pID= merged_inline[2]
# demographics_inline= data.frame(pID, demographics_inline, check.names = F)
colAdd = c("PatientID", "Label")
demographics_inline[ , colAdd] <- NA
demographics_inline["Label"]= "Demographics"
demographics_inline["PatientID"]=pID
demographicsComplete1= demographics_inline[c(218:219,1:217)]
############################################################################################
#################### FINAL EXCEL ##############################################
pID= merged_inline[2]
### ENCOUNTERS
divEncounter= nrow(encountersComplete)/682
encountersComplete1=encountersComplete
colAdd = c("PatientID", "Label")
encountersComplete1[ , colAdd] <- NA
encountersComplete1["Label"]= "Encounters"
encountersComplete1= encountersComplete1[c(104:105,1:103)]
n=1
rangeEncounter= nrow(encountersComplete)
for(i in 1:rangeEncounter){
encountersComplete1[i,1]= pID[n,1]
if(i==n*divEncounter){
n=n+1
}
}
### BIOPSY
divBiopsy= nrow(biopsyComplete)/nrow(biopsy_inline)
biopsyComplete1=biopsyComplete
colAdd = c("PatientID", "Label")
biopsyComplete1[ , colAdd] <- NA
biopsyComplete1["Label"]= "Biopsy"
biopsyComplete1= biopsyComplete1[c(18:19,1:17)]
n=1
rangeBiopsy= nrow(biopsyComplete1)
for(i in 1:rangeBiopsy){
biopsyComplete1[i,1]= pID[n,1]
if(i==n*divBiopsy){
n=n+1
}
}
### ContinuousMedication
div_c_med= nrow(c_med_complete)/nrow(continuous_medication)
c_med_complete1=c_med_complete
colAdd = c("PatientID", "Label")
c_med_complete1[ , colAdd] <- NA
c_med_complete1["Label"]= "Continuous_Medication"
c_med_complete1= c_med_complete1[c(8:9,1:7)]
n=1
rangeCmed= nrow(c_med_complete)
for(i in 1:rangeCmed){
c_med_complete1[i,1]= pID[n,1]
if(i==n*div_c_med){
n=n+1
}
}
### Therapy
ivt_div= nrow(iv_t_complete)/nrow(ivtherapy)
iv_t_complete1=iv_t_complete
colAdd = c("PatientID", "Label")
iv_t_complete1[ , colAdd] <- NA
iv_t_complete1["Label"]= "IV_Therapy"
iv_t_complete1= iv_t_complete1[c(6:7,1:5)]
n=1
rangeCmed= nrow(c_med_complete)
for(i in 1:rangeCmed){
iv_t_complete1[i,1]= pID[n,1]
if(i==n*ivt_div){
n=n+1
}
}
### PlasmaExchange
pl_ex_div= nrow(pl_ex_complete)/nrow(plexchange)
pl_ex_complete1=pl_ex_complete
colAdd = c("PatientID", "Label")
pl_ex_complete1[ , colAdd] <- NA
pl_ex_complete1["Label"]= "Plasma_Exchange"
pl_ex_complete1= pl_ex_complete1[c(4:5,1:3)]
n=1
rangePlex= nrow(pl_ex_complete)
for(i in 1:rangePlex){
pl_ex_complete1[i,1]= pID[n,1]
if(i==n*pl_ex_div){
n=n+1
}
}
### Complications
divComplications= nrow(comp_complete)/nrow(complications_inline)
comp_complete1=comp_complete
colAdd = c("PatientID", "Label")
comp_complete1[ , colAdd] <- NA
comp_complete1["Label"]= "Complications"
comp_complete1= comp_complete1[c(12:13,1:11)]
n=1
rangeComp= nrow(comp_complete)
for(i in 1:rangeComp){
comp_complete1[i,1]= pID[n,1]
if(i==n*divComplications){
n=n+1
}
}
### Changing Dates to Common Dates
names(encountersComplete1)[3]= "Date"
names(biopsyComplete1)[3]= "Date"
names(c_med_complete1)[7]= "Date"
names(iv_t_complete1)[5]= "Date"
names(pl_ex_complete1)[4]= "Date"
names(comp_complete1)[3]= "Date"
names(demographicsComplete1)[205]= "Date"
getwd()
############################################################################################
# File Intergrity check
write.csv(encountersComplete1, file = "enc.csv")
write.csv(biopsyComplete1, file = "bio.csv")
write.csv(c_med_complete1, file = "c_med.csv")
write.csv(iv_t_complete1, file = "iv.csv")
write.csv(pl_ex_complete1, file = "plex.csv")
write.csv(comp_complete1, file = "comp.csv")
write.csv(demographicsComplete1, file = "demo.csv")
############################################################################################
temp1<-smartbind(encountersComplete1,biopsyComplete1,c_med_complete1,iv_t_complete1,pl_ex_complete1,demographicsComplete1,comp_complete1)
temp1 <- temp1[!(temp1$Date == ""),]
temp1 <- temp1[!(is.na(temp1$PatientID)),]
temp1 <- temp1[!(temp1$PatientID=="NA"),]
N <- 2
for(i in 1:N){
temp1<-temp1[-1]
i=i+1
}
write.csv(temp1,file = "bindf.csv")
}
#This function takes the address of the directory and files and passess the address to the main function
address<-function(){
d<-readline(prompt = "enter the working directory.All merged file be saved at this location--")
f1<-readline(prompt = "enter the path of the biopsy file--")
f2<-readline(prompt = "enter the path of the treatment file--")
f3<-readline(prompt = "enter the path of the demograohic file--")
f4<-readline(prompt = "enter the path of the encounter file--")
f5<-readline(prompt = "enter the path of the complication file--")
#function invocation with path address as arguement
myfunction(d,f1,f2,f3,f4,f5)
}
#address()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.