R/pvsRequest9.1.R

pvsRequest9.1 <- 
	function (request,inputs) {
    pvs.key <- getPVS_key()  # check "pvs.key" option, then "pvs.key" 
    pvs.url <- paste("http://api.votesmart.org/",request,"key=", pvs.key, inputs, sep="") #generate url for request
		
		httpresp <- GET(url=pvs.url)
		xmltext <- content(x=httpresp, as="text")
		errors <-  getXMLErrors(xmltext) # check if xml can be parsed properly
		
		if (length(errors) != 0) {
			if (names(errors[[1]]$code) == "XML_ERR_CDATA_NOT_FINISHED") { # if not, try to fix 
				xmltext <- gsub(pattern="\003", replacement="", x=xmltext, fixed=TRUE)
			}
		}
		output.base <- xmlRoot(xmlTreeParse(xmltext, useInternalNodes=TRUE))

		if (names(output.base)[1]=="errorMessage") {
			
			# if the requested data is not available, return an empty (NA) data frame and give a warning
			warning(gsub(pattern="&", replacement=" ", x=paste("No data available for: ", inputs,". The corresponding rows in the data frame are filled with NAs.", sep=""), fixed=TRUE), call.=FALSE)
			output.df <- data.frame(matrix(nrow=1,ncol=0), stringsAsFactors = FALSE)
			output.df

		} else {
			
			output <- output.base
			nodenames <- names(output) # get names of nodes
			
			# remove unnecessary child-nodes
			if (nodenames[1]=="generalInfo") {
				output <- removeChildren(output,kids="generalInfo")
				
			} else { 
				if (nodenames[1]=="generalinfo") {
				
				output <- removeChildren(output,kids="generalinfo")
				}
			}
			nodenames <- names(output) # get names of nodes

			# process each relevant subnode of output (e.g., without generalinfo) individually
			# specifically check if subnode has again subnodes with several entries such as stage in the Election.getElection above.Then process each individually
			subnodes.list <- lapply(1:length(nodenames), FUN= function(x) {
				subn <- output[[x]]
				freq.names <- summary(as.factor(names(subn))) # check frequency
				n.subnames <- sapply(names(freq.names), function(y) { length(names(subn[[y]]))}) # check number of subnodes in each element
				
				
				if (sum(n.subnames)==length(n.subnames)) {
					subnode.df <- data.frame(t(xmlSApply(subn, xmlValue)), stringsAsFactors = FALSE)

				} else {
					
					
					# first, extract xml values of subnodes of subnodes (would otherwise generate problematic columns)
					severalsubn <- which(n.subnames>1)
					sevsubs <- lapply (1:length(severalsubn), FUN=function(i) {
						.sevsub <- which(names(subn)==names(severalsubn)[i]) # which subnodes (of subn) have several entries?
						sevsub.list <- lapply(1:length(.sevsub), function(j){ 
							df <-   data.frame(t(xmlSApply(subn[[.sevsub[[j]] ]], xmlValue)), stringsAsFactors = FALSE)
							df.names <- sapply(1:length(names(df)), FUN=function(z) {paste(names(.sevsub[j]),j,".",names(df)[z], sep="")})
							names(df) <- df.names
							df
						}
						)
						sevsub.df <- do.call("cbind", sevsub.list)
					}
					)
					sevsubs.extracted <-  do.call("cbind", sevsubs)
					
					# second, extract all xml values as usual (generates some problematic columns due to subnodes in subnodes)
					# therefore only keep the normal ones:
					df.ok <- data.frame(t(xmlSApply(subn, xmlValue)), stringsAsFactors = FALSE)
					ok.names <- names(subn)[(names(subn)!=names(severalsubn)[1])]
					df.ok <- df.ok[,ok.names]

					# third, cbind the two data frames
					subnode.df <- cbind(df.ok,sevsubs.extracted)
				}
				subnode.df
			})
			output.df <- bind_rows(subnodes.list)
			
			return(output.df)
		}
	}


  
umatter/pvsR documentation built on Jan. 9, 2021, 4:35 p.m.