R/pvsRequest6.1b.R

pvsRequest6.1b <-
	
	function (request,inputs) {
		pvs.key <- getPVS_key()
		pvs.url <- paste("http://api.votesmart.org/",request,"key=",pvs.key,inputs,sep="") #generate url for request
		
		httpresp <- GET(url=pvs.url)
		xmltext <- content(x=httpresp, as="text")
		errors <-  getXMLErrors(xmltext) # check if xml can be parsed properly
		
		if (length(errors) != 0) {
			if (names(errors[[1]]$code) == "XML_ERR_CDATA_NOT_FINISHED") { # if not, try to fix 
				xmltext <- gsub(pattern="\003", replacement="", x=xmltext, fixed=TRUE)
			}
		}
		output.base <- xmlRoot(xmlTreeParse(xmltext, useInternalNodes=TRUE))

		if (names(output.base)[1]=="errorMessage") {
			
			# if the requested data is not available, return an empty (NA) data frame and give a warning
			warning(gsub(pattern="&", replacement=" ", x=paste("No data available for: ", inputs,". The corresponding rows in the data frame are filled with NAs.", sep=""), fixed=TRUE), call.=FALSE)
			output.df <- data.frame(matrix(nrow=1,ncol=0), stringsAsFactors = FALSE)
			output.df

		} else {
			
			output <- output.base
			nodenames <- names(output) # get names of nodes
			
			# remove unnecessary child-nodes
			if (nodenames[1]=="generalInfo") {
				output <- removeChildren(output,kids="generalInfo")
				
			} else { if (nodenames[1]=="generalinfo") {
				output <- removeChildren(output,kids="generalinfo")
				
			}
			}
			
			nodenames <- names(output) # get names of nodes
			freq.names <- summary(as.factor(names(output))) # check frequency, 

			# if there are some nodes with one entry, and others with many, the ones with one contain data that belongs to every single entry of the one
			# with several entries (like invariant variable over different observations, --> macrodata)

			# check if there are several nodes with the same name
			if (sum(freq.names)>length(unique(nodenames))) { 

				freq0 <- unique(names(freq.names[freq.names==0]))
				freq1 <- unique(names(freq.names[freq.names==1]))
				freqh <- unique(names(freq.names[freq.names>1]))
				
				# scrap all data from nodes that come up once, and cbind the resulting dfs
				freq1.list <- lapply(freq1, FUN=function(i) {
					x <- output[[i]]
					i.df <- data.frame(t(xmlSApply(x, xmlValue)), stringsAsFactors = FALSE)
					suppressWarnings(try(if (names(x)=="text")     names(i.df) <- i, silent=TRUE ))
					i.df
				})
				
				#freq1.df <- do.call("cbind", freq1.list)
				freq1.df <- bind_cols(freq1.list)
				

				# remove freq1-nodes from output.
				for (i in freq1)   output <- removeChildren(output,i)

				# now scrape the remaining output
				output.list <- lapply(1:length(names(output)), FUN=function(i) {
					subn <- output[[i]]
					
					# Use a method that is robust to cases, where the subnodes have other subnodes, but only with one child (cagegories in Ratings!)
					subnode.list <- lapply(1:length(names(subn)), function(i) {
						
						if (length(unlist(xmlToList(subn[[i]])))>1) {
							data.frame(t(unlist(xmlToList(subn[[i]]))), stringsAsFactors = FALSE)

						} else {
							val <-  xmlValue(subn[[i]])
							val.df <- data.frame(i=t(val), row.names=NULL, stringsAsFactors = FALSE)
							names(val.df) <- xmlName(subn[[i]])
							val.df
						}
					}
					)
					#subnode.df <- do.call("cbind",subnode.list)
					subnode.df <- bind_cols(subnode.list)
					
					subnode.df
				})

				output2 <- bind_rows(output.list)
				output2 <- cbind(freq1.df,output2)
				output2
				
			} else {
				output <- t(xmlSApply(removeChildren(output.base,kids=1), function(x) xmlSApply(x, xmlValue)))
				output.df <- data.frame(output, row.names=NULL, stringsAsFactors = FALSE)
				output.df
			}
		}
	}
umatter/pvsR documentation built on Jan. 9, 2021, 4:35 p.m.