inst/debiApp/DEBiAdmin.r

####
# Author: Martin W. Goros
# UT Health Science Center

dmply <- function(data.,splitby.,function.){
  dlist. <- with(data.,split(data.,splitby.))
  function. <- function.
  elist. <- lapply(dlist.,function.)
  out. <- do.call(rbind.data.frame,elist.)
  row.names(out.) <- 1:nrow(out.)
  return(out.)
}

full_data <- read_redcap(redcap_url=rc_url,secret_token=rc_token)

if(!is.null(full_data)){
  if(is.null(full_data$redcap_repeat_instrument)){
    full_data$redcap_repeat_instrument <- NA
  }
# read projectinfo table
projectinfo <- subset(full_data,select=c('record_id','redcap_repeat_instrument','job_title','job_alias','job_description','job_start_date','job_end_date','assigned_faculty','assigned_staff','pi_name','pi_department','data_source','funding_type','importance','h_index','modifier_job','deadline_date'))
projectinfo <- subset(projectinfo,!(redcap_repeat_instrument %in% c('job_phase','work')))
projectinfo$job_start_date <- as.Date(projectinfo$job_start_date, format = '%Y-%m-%d')
    
requestdf <- subset(full_data,select=c('record_id','redcap_repeat_instrument','requestor2','requestor','requestor_type','requestor_type_other','super_faculty_email','requestor_email','requestor_phone','time_for_contact','requesting_dept','funding_source','funding_source_other','deb_contact_email','project_name','short_description','datasource','datasource_other','project_type','project_type_other','request_date','deadline_date','research_product___1','research_product___2','research_product___3','research_product___4','research_product___5','research_product___6','research_product___7','estimated_hours'))
requestdf <- subset(full_data,!(redcap_repeat_instrument %in% c('job_phase','work')))
    
requestdf$project_name <- as.character(requestdf$project_name)
requestdf$requestor <- as.character(requestdf$requestor)
requestdf$requesting_dept <- as.character(requestdf$requesting_dept)
requestdf$deb_contact_email <- as.character(requestdf$deb_contact_email)
requestdf$funding_source <- as.numeric(as.character(requestdf$funding_source))
requestdf$datasource <- as.character(requestdf$datasource)
requestdf$request_date <- as.character(requestdf$request_date)
requestdf$deadline_date <- as.character(requestdf$deadline_date)
      
# change funding source and data source
requestdf$funding_source <- ifelse(requestdf$funding_source == 1, 'CTSA',
                            ifelse(requestdf$funding_source == 2, 'CTRC',
                            ifelse(requestdf$funding_source == 3, 'Barshop',
                            ifelse(requestdf$funding_source == 4, 'PepperCenter',
                            ifelse(requestdf$funding_source == 5, 'Grant Funded',
                            ifelse(requestdf$funding_source == 6, 'Industry Funded',
                            ifelse(requestdf$funding_source == 7, 'Unknown',
                            ifelse(requestdf$funding_source == 8, 'Not funded',
                            ifelse(requestdf$funding_source == 9, 'Other',
                            ifelse(requestdf$funding_source == 10, 'Stats for Students',
                            ifelse(requestdf$funding_source == 11, 'Pediatrics Fellowship',NA)))))))))))
                                        
requestdf$funding_source <- ifelse((requestdf$funding_source == 'Other') & (requestdf$funding_source_other != ""), as.character(requestdf$funding_source_other), as.character(requestdf$funding_source))
requestdf$binary_funded <- with(requestdf,ifelse((funding_source=="Unknown") | (funding_source=="Other") | (funding_source=="Not funded"),0,1))
requestdf$binary_funded <- ifelse((requestdf$binary_funded==0 & grepl("ctrc",tolower(requestdf$funding_source_other))) | (requestdf$binary_funded==0 & grepl("ctsa",tolower(requestdf$funding_source_other))) | (requestdf$binary_funded==0 & grepl("grant",tolower(requestdf$funding_source_other))),1,requestdf$binary_funded)

requestdf$datasource <- ifelse(requestdf$datasource == 1, 'MS Excel',
                        ifelse(requestdf$datasource == 2, 'Database',
                        ifelse(requestdf$datasource == 3, 'RedCap',
                        ifelse(requestdf$datasource == 4, 'Other',NA))))
requestdf$datasource <- ifelse((requestdf$datasource == 'Other') & (requestdf$datasource != ""), as.character(requestdf$datasource_other), as.character(requestdf$datasource))
     
outcometable <- data.frame(old=1:7,label=c('poster','poster','paper','grant','consult','consult','consult'))
outcome0 <- data.frame(requestdf$research_product___1,requestdf$research_product___2,requestdf$research_product___3,
                       requestdf$research_product___4,requestdf$research_product___5,requestdf$research_product___6,requestdf$research_product___7)
    
requestdf$outcome <- NULL
for(i in 1:nrow(outcome0)){
selection <- which(outcome0[i,]==1)
  if(length(selection) > 0){
    renumbered <- (ifelse(selection == 1, 2, ifelse(selection == 2, 2, ifelse(selection == 3, 1, ifelse(selection == 4, 1, ifelse(selection == 5, 3, ifelse(selection == 6, 3, ifelse(selection == 7, 3, NA))))))))
    highest_goal <- selection[which(renumbered == min(renumbered))][1]
    requestdf$outcome[i] <- as.character(outcometable[which(highest_goal==outcometable$old),]$label)
  }else{
    requestdf$outcome[i] <- NA 
  }
}

# h-index
getWebsite <- function(name){
  url = utils::URLencode(paste0("https://www.google.com/search?q=",name))
  page <- xml2::read_html(url)
   
  results <- page %>% 
  rvest::html_nodes("cite") %>% 
  rvest::html_text()
  result <- results
    
  return(as.character(result))
}
    
get_h_index <- function(first.name,last.name){
  investigator <- paste0(first.name," ",last.name," h-index")
  websites <- data.frame(website = sapply(investigator,getWebsite))
  names(websites) <- "website"
        
  # specifying the url for desired website to be scrapped
  url0 <- grep('uthscsa.influuent.utsystem.edu/en/persons/',as.character(websites$website),v=1)
  url01 <- url0[!grepl('fingerprints',url0)]
  url02 <- url01[!grepl('similar',url01)]
  url03 <- url02[!grepl('projects',url02)]
  url <- url03[!grepl('network',url03)]   
      
  if(length(url)==0){
    hindex <- 0
  }     
  else if(grepl(tolower(last.name),url)){
    # reading the HTML code from the website
    webpage <- xml2::read_html(url)
     
    # using CSS selectors to scrap the rankings section
    rank_data_html <- rvest::html_nodes(webpage,'.increment-counter')
        
    # converting the ranking data to text
    rank_data <- rvest::html_text(rank_data_html)
             
    hindex <- as.numeric(rank_data[2])
  }else{
    hindex <- 0
  } 
  return(hindex)
}
     
# change project name
requestdf$project_name_new <- paste0(requestdf$requestor,"_",requestdf$record_id)
completed_email <- data.frame(Project=requestdf$project_name_new,Email=requestdf$requestor_email)
             
job_phase <- subset(full_data,select=c('record_id','redcap_repeat_instrument','job_phase','job_phase_date','job_phase_comment','modifier_job_phase'))
job_phase <- subset(job_phase,!(redcap_repeat_instrument %in% c('','work')))
         
# check if project id is different from existing
check_record_id <- sort(unique(job_phase$record_id))

new_id <- setdiff(check_record_id,requestdf$record_id)
  if(any(new_id==1)){
    new_id_create <- new_id    
  }else{
    new_id_create <- new_id[!(new_id %in% c(check_record_id))]
  }
complete <- requestdf[requestdf$record_id %in% new_id_create,]$project_survey_complete
    
requestdf$deadline_date <- as.Date(requestdf$deadline_date,format='%Y-%m-%d')
requestdf$request_date <- as.Date(requestdf$request_date,format='%Y-%m-%d')
        
# calculating importance and making new entry in REDCap
if(length(new_id_create)>0){
    
  requestdf$hindex_new <- NA
    for(h_i in new_id_create){
      requestdf$hindex_new[which(requestdf$record_id==h_i)] <- get_h_index(as.character(requestdf[which(requestdf$record_id==h_i),]$requestor2),as.character(requestdf[which(requestdf$record_id==h_i),]$requestor))
    }
           
# you can create formula to calculate importance
requestdf$importance <- NA 
    
requestdf$deadline_date <- as.character(requestdf$deadline_date)

jobstart <- as.character(format(Sys.Date(),format='%m/%d/%Y'))
newproject <- requestdf[requestdf$record_id %in% new_id_create,]$project_name_new
      
for(newpr in new_id_create){
                
  email.not <- requestdf[requestdf$record_id == newpr,]$project_survey == 2
  if(requestdf[requestdf$record_id == newpr,]$project_survey == 2){
        
    oldproject <- requestdf[requestdf$record_id == newpr,]$project_name
    oldproject <- gsub("[[:punct:]]", " ", oldproject)
    descript <- as.character(requestdf[requestdf$record_id == newpr,]$short_description)
        
    ### update job  
    job <- subset(projectinfo,!(record_id == newpr))
    new_jobid <- newpr                                        
    data0 <- data.frame(record_id=new_jobid,
                              job_title=requestdf[requestdf$record_id == newpr,]$project_name_new,
                              job_alias=oldproject,
                              job_description=descript,
                              job_start_date=requestdf[requestdf$record_id == newpr,]$request_date,
                              job_end_date="",
                              assigned_faculty=requestdf[requestdf$record_id == newpr,]$deb_contact_email,
                              assigned_staff="",
                              pi_name=paste0(as.character(requestdf[requestdf$record_id == newpr,]$requestor2)," ",as.character(requestdf[requestdf$record_id == newpr,]$requestor)),
                              pi_department=requestdf[requestdf$record_id == newpr,]$requesting_dept,
                              data_source=requestdf[requestdf$record_id == newpr,]$datasource,
                              funding_type=ifelse(requestdf[requestdf$record_id == newpr,]$funding_source=="Other",as.character(requestdf[requestdf$record_id == newpr,]$funding_source_other),as.character(requestdf[requestdf$record_id == newpr,]$funding_source)),
                              estimated_hours=as.numeric(as.character(requestdf[requestdf$record_id == newpr,]$estimated_hours)),
                              importance=as.numeric(as.character(requestdf[requestdf$record_id == newpr,]$importance)),
                              h_index=as.numeric(as.character(requestdf[requestdf$record_id == newpr,]$hindex_new)),
                              modifier_job='debi',
                              job_complete=2)
    job.OUT <- ParseRtoREDCap(data0)
    postForm(rc_url, data=job.OUT, token=rc_token, content="record", type="flat", format="csv",returnFormat="csv", overwriteBehavior="overwrite",.opts=curlOptions(ssl.verifypeer=FALSE, cainfo=REDCap.crt, verbose=FALSE))
          
    ### update job_phase
    new_jobphaseid <- newpr
    data1 <- data.frame(record_id=new_jobid,
                              redcap_repeat_instrument='job_phase',
                              redcap_repeat_instance=1,
                              job_phase="Intake",
                              job_phase_date=paste0(requestdf[requestdf$record_id == newpr,]$request_date," ",gsub(".* ","",gsub(" CDT","",Sys.time()))),
                              job_phase_comment=oldproject,
                              modifier_job_phase='debi',
                              job_phase_complete=2)
    job_phase.OUT <- ParseRtoREDCap(data1)
    postForm(rc_url, data=job_phase.OUT, token=rc_token, content="record", type="flat", format="csv",returnFormat="csv", overwriteBehavior="overwrite",.opts=curlOptions(ssl.verifypeer=FALSE, cainfo=REDCap.crt, verbose=TRUE))
    }
  }
}  
}           

### END ###
MartinGoros/debi documentation built on May 30, 2019, 10:46 p.m.