library(dplyr) library(data.table) # library(devtools) # install_github("phillipf/AddressR") library(AddressR) library(RODBC)
CoolingTowers <- read.csv('N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/5. Cooling Towers/050117_DHHSCTSAreadetails.csv', stringsAsFactors=FALSE) CoolingTower_Sites <- data.frame(SITE = CoolingTowers$X.1[grepl("SITE", CoolingTowers$X.1)], Geocode = CoolingTowers$X.1[grepl("Geocoded", CoolingTowers$X.1)]) %>% mutate(x = sapply(strsplit(sub("Geocoded: ", "", Geocode), "/"), "[", 1), coord = sapply(strsplit(sub("Geocoded: ", "", Geocode), "/"), "[", 2)) %>% transmute(SITE, LATITUDE = unname(x), LONGITUDE = sapply( sapply(coord, strsplit, '[(]'), "[", 1), ACCURACY = sapply(sapply(coord, strsplit, '[(]'), "[", 2)) %>% mutate(ADDRESS_STRING = gsub("(SITE\\s*[0-9]+)\\s*-\\s*(.*)","\\2",SITE), SITE = as.numeric(gsub("SITE\\s*([0-9]+)\\s*-\\s*(.*)","\\1",SITE)), ACCURACY = gsub("\\)$", "", ACCURACY)) write.csv(CoolingTower_Sites, 'N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/5. Cooling Towers/160517_unlabelled_DHHS.csv', row.names = F)
paste3 <- function(...,sep=" ") { L <- list(...) L <- lapply(L,function(x) {x[is.na(x)] <- ""; x}) ret <-gsub(paste0("(^",sep,"|",sep,"$)"),"", gsub(paste0(sep,sep),sep, do.call(paste,c(L,list(sep=sep))))) is.na(ret) <- ret=="" ret } EMIS <- read.csv('N:/ABR/TradeWasteCustomers/16052017_ActiveInactive.csv', stringsAsFactors=FALSE, skip=6) %>% select(AG_CONSUMERNUMBER, textbox11, textbox65, textbox43, textbox46, textbox49, textbox53, textbox56) EMIS$ADDRESS_STRING <- paste3(paste3(EMIS$textbox11, EMIS$textbox65, EMIS$textbox43), paste3(EMIS$textbox46, EMIS$textbox49), paste3(EMIS$textbox53, "VIC", EMIS$textbox56), sep = ", ") write.csv(EMIS, "N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/7. EMIS/160517_unlabelled_EMIS.csv", row.names = F)
```{python DHHS parserator} import re import GNAF import itertools import operator
import pandas as pd
unlabelled = pd.read_csv("N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/5. Cooling Towers/160517_unlabelled_DHHS.csv", header=None)
site = unlabelled[0]
unlabelled = unlabelled[4].apply(lambda x: re.sub(r', Australia$', "", x))
def accumulate(l): it = itertools.groupby(l, operator.itemgetter(1)) for key, subiter in it: yield key, ' '.join(item[0] for item in subiter)
labelled = unlabelled.apply(lambda x: GNAF.parse(x))
labelled = labelled.apply(lambda x: list(accumulate(x)))
cols = ['SITE','ADDRESS_STRING', 'BUILDING_NAME', 'LOT_NUMBER_PREFIX', 'LOT_NUMBER', 'LOT_NUMBER_SUFFIX', 'LEVEL_TYPE', 'LEVEL_NUMBER', 'FLAT_TYPE', 'FLAT_NUMBER_PREFIX', 'FLAT_NUMBER', 'FLAT_NUMBER_SUFFIX', 'NUMBER_FIRST', 'NUMBER_FIRST_SUFFIX', 'NUMBER_LAST', 'STREET_NAME', 'STREET_TYPE_CODE', 'LOCALITY_NAME', 'POSTCODE', 'STATE_ABBREVIATION']
df = pd.DataFrame(columns=cols)
df['SITE'] = site
df['ADDRESS_STRING'] = unlabelled
for i, row in enumerate(labelled): for item in row: df[item[0]][i] = item[1]
df2 = df.ix[1:]
df2.to_csv('N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/5. Cooling Towers/160517_labelled_DHHS.csv', index=False)
```{python EMIS parserator} import re import GNAF import itertools import operator import pandas as pd unlabelled = pd.read_csv("N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/7. EMIS/160517_unlabelled_EMIS.csv", header=None) site = unlabelled[0] unlabelled = unlabelled[8].apply(lambda x: re.sub(r', Australia$', "", x)) def accumulate(l): it = itertools.groupby(l, operator.itemgetter(1)) for key, subiter in it: yield key, ' '.join(item[0] for item in subiter) labelled = unlabelled.apply(lambda x: GNAF.parse(x)) labelled = labelled.apply(lambda x: list(accumulate(x))) cols = ['SITE','ADDRESS_STRING', 'BUILDING_NAME', 'LOT_NUMBER_PREFIX', 'LOT_NUMBER', 'LOT_NUMBER_SUFFIX', 'LEVEL_TYPE', 'LEVEL_NUMBER', 'FLAT_TYPE', 'FLAT_NUMBER_PREFIX', 'FLAT_NUMBER', 'FLAT_NUMBER_SUFFIX', 'NUMBER_FIRST', 'NUMBER_FIRST_SUFFIX', 'NUMBER_LAST', 'STREET_NAME', 'STREET_TYPE_CODE', 'LOCALITY_NAME', 'POSTCODE', 'STATE_ABBREVIATION'] df = pd.DataFrame(columns=cols) df['SITE'] = site df['ADDRESS_STRING'] = unlabelled for i, row in enumerate(labelled): for item in row: df[item[0]][i] = item[1] df2 = df.ix[1:] df2.to_csv('N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/7. EMIS/160517_labelled_EMIS.csv', index=False)
labelled_EMIS <- read.csv("N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/7. EMIS/160517_labelled_EMIS.csv", stringsAsFactors = F) manual_training <- labelled_EMIS %>% filter(BUILDING_NAME != "") %>% sample_frac(0.2) manual_training2 <- labelled_EMIS %>% filter(BUILDING_NAME == "") %>% sample_frac(0.005) manual_training3 <- rbind(manual_training, manual_training2) %>% select(ADDRESS_STRING) write.table(manual_training3, "C:/Users/farrelp1/Documents/GNAFAPI/parserator/data/170517_unlabelled_EMIS.csv", row.names = F, col.names = F) manual_training <- labelled_EMIS %>% filter(BUILDING_NAME == "LOT" | FLAT_TYPE == "PREV" | grepl("SH", BUILDING_NAME) | grepl("GRN", BUILDING_NAME) | grepl("GRD", BUILDING_NAME) | grepl("LOWERGROUND", BUILDING_NAME) | grepl("GROUND", FLAT_TYPE) | FLAT_TYPE == "GR" | FLAT_TYPE == "G") %>% select(ADDRESS_STRING) write.table(manual_training, "C:/Users/farrelp1/Documents/GNAFAPI/parserator/data/170517_unlabelled_EMIS2.csv", row.names = F, col.names = F) manual_training <- labelled_EMIS %>% filter(BUILDING_NAME == "PRINCES HIGHWAY," | BUILDING_NAME == "WELLINGTON PARADE," | BUILDING_NAME == "OLYMPIC BOULEVARD," | BUILDING_NAME == "PARK CRESCENT," | BUILDING_NAME == "MELBOURNE PLACE,") %>% select(ADDRESS_STRING) write.table(manual_training, "C:/Users/farrelp1/Documents/GNAFAPI/parserator/data/170517_unlabelled_EMIS3.csv", row.names = F, col.names = F) manual_training <- labelled_EMIS %>% filter(grepl("^[A-Z]{1}\\s.*", STREET_NAME)) %>% select(ADDRESS_STRING) write.table(manual_training, "C:/Users/farrelp1/Documents/GNAFAPI/parserator/data/090617_unlabelled_EMIS4.csv", row.names = F, col.names = F)
ABR <- odbcDriverConnect(connection="Driver={SQL Server Native Client 11.0}; server= WVdb1devsql; database=ABR; uid=CWW\\farrelp1; Trusted_Connection=Yes;") GNAF_address <- sqlQuery(ABR, "SELECT DISTINCT [ADDRESS_DETAIL_PID] ,CASE WHEN [NUMBER_FIRST] % 2 = 0 THEN REPLACE(SUBSTRING([LOCALITY_NAME], 1, 3) + SUBSTRING([STREET_NAME], 1, 3) + 'EVEN', ' ', '') ELSE REPLACE(SUBSTRING([LOCALITY_NAME], 1, 3) + SUBSTRING([STREET_NAME], 1, 3) + 'ODD', ' ', '') END AS [FULL_KEY] ,CASE WHEN [NUMBER_FIRST] % 2 = 0 THEN REPLACE(SUBSTRING([STREET_NAME], 1, 2) + 'EVEN', ' ', '') ELSE REPLACE(SUBSTRING([STREET_NAME], 1, 2) + 'ODD', ' ', '') END AS [KEY] ,[NUMBER_FIRST] ,[NUMBER_LAST] ,[STREET_NAME] ,[STREET_TYPE_CODE] ,CASE WHEN [STREET_TYPE_CODE] IS NULL THEN [STREET_NAME] ELSE [STREET_NAME] + ' ' + [STREET_TYPE_CODE] END AS [STREET] ,[LOCALITY_NAME] ,[POSTCODE] ,[LONGITUDE] ,[LATITUDE] ,CASE WHEN [NUMBER_LAST] IS NULL THEN 1 ELSE 2 END AS ADDRESS_TYPE ,CONFIDENCE ,[PRIMARY_SECONDARY] FROM [ABR].[dbo].[ADDRESS_VIEW] WHERE [PRIMARY_SECONDARY] = 'P' OR [PRIMARY_SECONDARY] IS NULL;", stringsAsFactors=FALSE)
text_clean <- function(x, field.type = "text") { if(x == "" | is.na(x)) { return(NA) } else if (field.type == "text"){ x <- gsub(",$","",x) return(toupper(x)) } else if (field.type == "num"){ return(as.numeric(gsub(",","",x))) } } text_clean <- Vectorize(text_clean) labelled_DHHS <- read.csv("N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/5. Cooling Towers/160517_labelled_DHHS.csv", stringsAsFactors = F) DHHS_sites <- read.csv("N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/5. Cooling Towers/160517_unlabelled_DHHS.csv", stringsAsFactors = F) indx <- !is.na(labelled_DHHS$NUMBER_FIRST_SUFFIX) & is.numeric(labelled_DHHS$NUMBER_FIRST_SUFFIX) labelled_DHHS[indx,]$NUMBER_FIRST <- labelled_DHHS[indx,]$NUMBER_FIRST_SUFFIX labelled_DHHS[indx,]$NUMBER_FIRST_SUFFIX <- NA indx <- grepl("[A-Z]$", labelled_DHHS$NUMBER_FIRST) numFirst <- gsub("([0-9]+)[A-Z]", "\\1", labelled_DHHS[indx,]$NUMBER_FIRST) suffix <- gsub("([0-9]+)([A-Z])", "\\2", labelled_DHHS[indx,]$NUMBER_FIRST) labelled_DHHS[indx,]$NUMBER_FIRST <- numFirst labelled_DHHS[indx,]$NUMBER_FIRST_SUFFIX <- suffix labelled_DHHS <- labelled_DHHS %>% mutate(BUILDING_NAME = text_clean(BUILDING_NAME), LEVEL_TYPE = text_clean(LEVEL_TYPE), LEVEL_NUMBER = text_clean(LEVEL_NUMBER, field.type = "num"), FLAT_TYPE = text_clean(FLAT_TYPE), FLAT_NUMBER_PREFIX = text_clean(FLAT_NUMBER_PREFIX), FLAT_NUMBER = text_clean(FLAT_NUMBER, field.type = "num"), NUMBER_FIRST = text_clean(NUMBER_FIRST, field.type = "num"), NUMBER_LAST = text_clean(NUMBER_LAST, field.type = "num"), STREET_NAME = text_clean(STREET_NAME), STREET_TYPE_CODE = text_clean(STREET_TYPE_CODE), LOCALITY_NAME = text_clean(LOCALITY_NAME), POSTCODE = text_clean(POSTCODE, field.type = "num"), STATE_ABBREVIATION = text_clean(STATE_ABBREVIATION), KEY = gsub(" ", "", paste0(#substr(LOCALITY_NAME, 1,3), substr(gsub("'","", STREET_NAME), 1,1), ifelse(NUMBER_FIRST %% 2 == 0, "EVEN", "ODD"))), ID = row_number(), ADDRESS_TYPE = ifelse(is.na(NUMBER_LAST), 1, 2)) %>% left_join(select(DHHS_sites, -ADDRESS_STRING), by = "SITE") GNAF_address <- GNAF_address %>% mutate(PID = as.numeric(gsub("GAVIC", "", ADDRESS_DETAIL_PID))) Rcpp::sourceCpp("C:/Users/farrelp1/Documents/AddressR/src/AddressMatchTotalScorev6.1.cpp") Rcpp::sourceCpp("C:/Users/farrelp1/Documents/AddressR/src/AddressMatchTotalScorev7.cpp") Rcpp::sourceCpp("C:/Users/farrelp1/Documents/AddressR/src/AddressMatchTotalScorev9.cpp") Rcpp::sourceCpp("C:/Users/farrelp1/Documents/AddressR/src/AddressMatchTotalScorev10.cpp") Clean2 <- function(nonblanks, ID) { # GNAF_address <- data.table::fread("C:/Users/farrelp1/Documents/GNAFAPI/data/May17_GNAF.csv", stringsAsFactors = F) # # colnames(GNAF_address) <- colnames(data.table::fread("C:/Users/farrelp1/Documents/AddressR/data/May17_GNAF_address.csv", stringsAsFactors = F))[1:15] options <- lapply(nonblanks$KEY, function(x) which(x == GNAF_address$KEY)) names(options) <- ID totalscore <- lapply(seq_along(options), function(i) parallelAddressTotalScore6(nonblanks[i,], GNAF_address[options[[i]],])) names(totalscore) <- ID options2 <- lapply(seq_along(options), function(i) cbind(option = suppressWarnings(options[[i]][which(totalscore[[i]] == min(totalscore[[i]]))]), score = suppressWarnings(totalscore[[i]][which(totalscore[[i]] == min(totalscore[[i]]))]))) names(options2) <- names(options) options3 <- options2[unname(unlist(lapply(options2, function(x) nrow(x) > 0)))] finalresult <- plyr::ldply(names(options3), function(i) cbind(id = i,options3[[i]], GNAF_address[options2[[i]][,1],])) # finalresult <- plyr::ldply(names(options3), function(i) cbind(id = i,options3[[i]], GNAF_address[options2[[i]][,1],])) %>% # mutate(id = as.numeric(as.character(id))) %>% # group_by(id) %>% # filter(CONFIDENCE == max(CONFIDENCE)) #finalresult <- plyr::ldply(options4, function(x) x %>% filter(CONFIDENCE == max(CONFIDENCE))) #%>% #mutate(ID = as.numeric(.id)) #dup <- finalresult[duplicated(finalresult$.id),] #CWW_TradeWaste_ABR <- select(finalresult, AG_CONSUMERNUMBER, ADDRESS_DETAIL_PID) %>% distinct() return(finalresult) } Clean3 <- function(nonblanks, ID) { GNAF_address2 <- data.table::as.data.table(GNAF_address) setkey(GNAF_address2, "KEY") nonblanks2 <- data.table::as.data.table(nonblanks) setkey(nonblanks2, "KEY") totalscore2 <- lapply(1:nrow(nonblanks2), function(i) { if(!is.na(nonblanks2[[i, "NUMBER_FIRST"]])) { options <- GNAF_address2[GNAF_address2[["KEY"]] == nonblanks2[[i, "KEY"]] & GNAF_address2[["NUMBER_FIRST"]] < nonblanks2[[i, "NUMBER_FIRST"]] + 50 & GNAF_address2[["NUMBER_FIRST"]] > nonblanks2[[i, "NUMBER_FIRST"]] - 50] } else { options <- GNAF_address2[GNAF_address2[["KEY"]] == nonblanks2[[i, "KEY"]]] } result <- parallelAddressTotalScore6(nonblanks2[i,], options) colnames(result) = options[["ADDRESS_DETAIL_PID"]] return(result) }) names(totalscore2) <- nonblanks2[["SITE"]] setkey(GNAF_address2, "ADDRESS_DETAIL_PID") finalresult <- plyr::ldply(names(totalscore2), function(i) { if(length(totalscore2[[i]]) > 0) { ADDRESS_DETAIL_PID <- names(totalscore2[[i]][,which.min(totalscore2[[i]])]) score <- as.numeric(unname(totalscore2[[i]][,which.min(totalscore2[[i]])])) result <- as.data.table(cbind(id = i, ADDRESS_DETAIL_PID = ADDRESS_DETAIL_PID, score = score)) } setkey(result, "ADDRESS_DETAIL_PID") GNAF_address2[result] }) finalresult$score <- as.numeric(finalresult$score) return(finalresult) } #AddressR::AddressClean2 #DHHS_GNAF <- AddressR::AddressClean2(nonblanks=labelled_DHHS, ID=labelled_DHHS$ID) DHHS_GNAF <- Clean2(nonblanks=labelled_DHHS, ID=labelled_DHHS$SITE) # for (i in seq_along(options)) { # # r <- parallelAddressTotalScore3(nonblanks[i, ], GNAF_address[options[[i]], ]) # # } # test <- parallelAddressTotalScore9(labelled_DHHS[1,], GNAF_address) works but very slow test <- parallelAddressTotalScore10(labelled_DHHS[1:2,], GNAF_address)
text_clean <- function(x, field.type = "text") { if(x == "" | is.na(x)) { return(NA) } else if (field.type == "text"){ x <- gsub(",$","",x) return(toupper(x)) } else if (field.type == "num"){ return(as.numeric(gsub(",","",x))) } } text_clean <- Vectorize(text_clean) labelled_EMIS <- read.csv("N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/7. EMIS/160517_labelled_EMIS.csv", stringsAsFactors = F) EMIS_sites <- read.csv("N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/7. EMIS/160517_unlabelled_EMIS.csv", stringsAsFactors = F) indx <- !is.na(labelled_EMIS$NUMBER_FIRST_SUFFIX) & is.numeric(labelled_EMIS$NUMBER_FIRST_SUFFIX) if(all(indx != FALSE)) { labelled_EMIS[indx,]$NUMBER_FIRST <- labelled_EMIS[indx,]$NUMBER_FIRST_SUFFIX labelled_EMIS[indx,]$NUMBER_FIRST_SUFFIX <- NA } indx <- grepl("[A-Z]$", labelled_EMIS$NUMBER_FIRST) if(all(indx != FALSE)) { numFirst <- gsub("([0-9]+)[A-Z]", "\\1", labelled_EMIS[indx,]$NUMBER_FIRST) suffix <- gsub("([0-9]+)([A-Z])", "\\2", labelled_EMIS[indx,]$NUMBER_FIRST) labelled_EMIS[indx,]$NUMBER_FIRST <- numFirst labelled_EMIS[indx,]$NUMBER_FIRST_SUFFIX <- suffix } indx <- grepl("^[A-Z]{1}\\s.*", labelled_EMIS$STREET_NAME) if(all(indx != FALSE)) { suffix <- gsub("^([A-Z]{1})\\s(.*)", "\\1", labelled_EMIS[indx,]$STREET_NAME) streetname <- gsub("^([A-Z]{1})\\s(.*)", "\\2", labelled_EMIS[indx,]$STREET_NAME) labelled_EMIS[indx,]$NUMBER_FIRST_SUFFIX <- suffix labelled_EMIS[indx,]$STREET_NAME <- streetname } indx <- grepl("^MOUNT\\s.*", labelled_EMIS$STREET_NAME) labelled_EMIS$STREET_NAME[indx] <- gsub("^(MOUNT)", "MT", labelled_EMIS[indx,]$STREET_NAME) labelled_EMIS <- labelled_EMIS %>% mutate(BUILDING_NAME = unname(text_clean(BUILDING_NAME)), LEVEL_TYPE = unname(text_clean(LEVEL_TYPE)), LEVEL_NUMBER = unname(text_clean(LEVEL_NUMBER, field.type = "num")), FLAT_TYPE = unname(text_clean(FLAT_TYPE)), FLAT_NUMBER_PREFIX = unname(text_clean(FLAT_NUMBER_PREFIX)), FLAT_NUMBER = unname(text_clean(FLAT_NUMBER, field.type = "num")), NUMBER_FIRST = unname(text_clean(NUMBER_FIRST, field.type = "num")), NUMBER_LAST = unname(text_clean(NUMBER_LAST, field.type = "num")), STREET_NAME = unname(text_clean(STREET_NAME)), STREET_TYPE_CODE = unname(text_clean(STREET_TYPE_CODE)), LOCALITY_NAME = unname(text_clean(LOCALITY_NAME)), POSTCODE = unname(text_clean(POSTCODE, field.type = "num")), STATE_ABBREVIATION = unname(text_clean(STATE_ABBREVIATION)), KEY = gsub(" ", "", paste0(#substr(LOCALITY_NAME, 1,3), substr(gsub("'","", STREET_NAME), 1,2), ifelse(NUMBER_FIRST %% 2 == 0, "EVEN", "ODD"))), ID = row_number(), ADDRESS_TYPE = ifelse(is.na(NUMBER_LAST), 1, 2)) #%>% #left_join(select(DHHS_sites, -ADDRESS_STRING), by = "SITE") #Rcpp::sourceCpp("C:/Users/farrelp1/Documents/AddressR/src/AddressMatchTotalScorev6.cpp") labelled_EMIS$LATITUDE = rep(as.numeric(NA), length=nrow(labelled_EMIS)) labelled_EMIS$LONGITUDE = rep(as.numeric(NA), length=nrow(labelled_EMIS)) EMIS_GNAF <- Clean3(nonblanks=labelled_EMIS, ID=labelled_EMIS$SITE)
#include <Rcpp.h> // [[Rcpp::export]] double diff(int x, int y) { // if(x == NA_INTEGER || y == NA_INTEGER) { // return int(0); // } if(x > y) { double ratio = 1 / (double(y)/double(x)); return ratio; } else { double ratio = 1 / (double(x)/double(y)); return ratio; } };
#include <Rcpp.h> // [[Rcpp::export]] double divide() { //Rcpp::Rcout << std::setprecision(10) << 4.50; //return 75 / 78; //return 0.5556466; double x = double(75)/double(78); return x; }
#include <Rcpp.h> // [[Rcpp::export]] Rcpp::LogicalVector test(Rcpp::DataFrame x, Rcpp::CharacterVector key) { Rcpp::CharacterVector sub = x["KEY"]; char* level = key[0]; Rcpp::LogicalVector ind(sub.size()); for (int i = 0; i < sub.size(); i++){ ind[i] = (sub[i] == level); } return(ind); //return x["STREET_NAME"][ind]; }
#include <Rcpp.h> // [[Rcpp::export]] Rcpp::CharacterVector test2(Rcpp::DataFrame x, Rcpp::CharacterVector key) { Rcpp::CharacterVector sub = x["KEY"]; char* level = key[0]; Rcpp::LogicalVector ind(sub.size()); for (int i = 0; i < sub.size(); i++){ ind[i] = (sub[i] == level); } Rcpp::CharacterVector STREET_NAME = x["STREET_NAME"]; Rcpp::CharacterVector LOCALITY_NAME = x["LOCALITY_NAME"]; Rcpp::NumericVector NUMBER_FIRST = x["NUMBER_FIRST"]; Rcpp::NumericVector NUMBER_LAST = x["NUMBER_LAST"]; Rcpp::CharacterVector STREET_TYPE_CODE = x["STREET_TYPE_CODE"]; Rcpp::NumericVector LONGITUDE = x["LONGITUDE"]; Rcpp::NumericVector LATITUDE = x["LATITUDE"]; Rcpp::NumericVector POSTCODE = x["POSTCODE"]; //return(ind); //return Rcpp::DataFrame::create(Rcpp::Named("STREET_NAME") = STREET_NAME[ind], //Rcpp::Named("LOCALITY_NAME") = LOCALITY_NAME[ind], //Rcpp::Named("NUMBER_FIRST") = NUMBER_FIRST[ind], //Rcpp::Named("NUMBER_LAST") = NUMBER_LAST[ind], //Rcpp::Named("STREET_TYPE_CODE") = STREET_TYPE_CODE[ind], //Rcpp::Named("LONGITUDE") = LONGITUDE[ind], //Rcpp::Named("LATITUDE") = LATITUDE[ind], //Rcpp::Named("POSTCODE") = POSTCODE[ind]); Rcpp::DataFrame y = Rcpp::DataFrame::create(Rcpp::Named("STREET_NAME") = STREET_NAME[ind], Rcpp::Named("LOCALITY_NAME") = LOCALITY_NAME[ind], Rcpp::Named("NUMBER_FIRST") = NUMBER_FIRST[ind], Rcpp::Named("NUMBER_LAST") = NUMBER_LAST[ind], Rcpp::Named("STREET_TYPE_CODE") = STREET_TYPE_CODE[ind], Rcpp::Named("LONGITUDE") = LONGITUDE[ind], Rcpp::Named("LATITUDE") = LATITUDE[ind], Rcpp::Named("POSTCODE") = POSTCODE[ind]); Rcpp::CharacterVector z = y["STREET_NAME"]; int len = z.size(); if(len == 0) { return y["STREET_NAME"];} else {return x["STREET_NAME"];} }
#include <Rcpp.h> using namespace Rcpp; // [[Rcpp::export]] IntegerVector order_(NumericVector x) { if (is_true(any(duplicated(x)))) { Rf_warning("There are duplicates in 'x'; order not guaranteed to match that of R's base::order"); } NumericVector sorted = clone(x).sort(); return match(sorted, x); } // [[Rcpp::export]] NumericVector sortIt(NumericVector v){ int len = v.size(); std::sort(&v[0], &v[len]); return v; } // [[Rcpp::export]] NumericVector in_range(NumericVector x) { NumericVector out = NumericVector(10); for (int i=0; i<100; i++) { out[i] = x[i]; } return out; }
#include <Rcpp.h> using namespace Rcpp; // [[Rcpp::export]] double which_min(Rcpp::NumericVector x) { //Rcpp::NumericVector x(xs); Rcpp::NumericVector::iterator it = // iterator type std::min_element(x.begin(), x.end()); // STL algo double pos = it - x.begin(); return pos; //return Rcpp::wrap(it - x.begin()); }
t <- sample(seq(from = 20, to = 50, by = 5), size = 50, replace = TRUE) t2 <- order_(t) t3 <- in_range(sortIt(t)) nrow(test2(GNAF_address, "C")) t <- sample(seq(from = 20, to = 50, by = 5), size = 50, replace = TRUE) which_min(t)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.