library(dplyr)
library(data.table)

# library(devtools)
# install_github("phillipf/AddressR")

library(AddressR)
library(RODBC)
CoolingTowers <- read.csv('N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/5. Cooling Towers/050117_DHHSCTSAreadetails.csv',
                          stringsAsFactors=FALSE)

CoolingTower_Sites <- data.frame(SITE = CoolingTowers$X.1[grepl("SITE", CoolingTowers$X.1)],
                                 Geocode = CoolingTowers$X.1[grepl("Geocoded", CoolingTowers$X.1)]) %>%
                      mutate(x = sapply(strsplit(sub("Geocoded: ", "", Geocode), "/"), "[", 1),
                             coord = sapply(strsplit(sub("Geocoded: ", "", Geocode), "/"), "[", 2)) %>%
                      transmute(SITE,
                                LATITUDE = unname(x),
                                LONGITUDE =  sapply(
                                            sapply(coord, strsplit, '[(]'),
                                            "[", 1),
                                ACCURACY = sapply(sapply(coord, strsplit, '[(]'),
                                                  "[", 2)) %>%
                      mutate(ADDRESS_STRING = gsub("(SITE\\s*[0-9]+)\\s*-\\s*(.*)","\\2",SITE),
                             SITE =  as.numeric(gsub("SITE\\s*([0-9]+)\\s*-\\s*(.*)","\\1",SITE)),
                             ACCURACY = gsub("\\)$", "", ACCURACY))

write.csv(CoolingTower_Sites, 
          'N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/5. Cooling Towers/160517_unlabelled_DHHS.csv', 
          row.names = F)
paste3 <- function(...,sep=" ") {
     L <- list(...)
     L <- lapply(L,function(x) {x[is.na(x)] <- ""; x})
     ret <-gsub(paste0("(^",sep,"|",sep,"$)"),"",
                 gsub(paste0(sep,sep),sep,
                      do.call(paste,c(L,list(sep=sep)))))
     is.na(ret) <- ret==""
     ret
     }

EMIS <- read.csv('N:/ABR/TradeWasteCustomers/16052017_ActiveInactive.csv',
                          stringsAsFactors=FALSE,
                 skip=6) %>%
        select(AG_CONSUMERNUMBER, textbox11, textbox65, textbox43, textbox46, textbox49, textbox53, textbox56)

EMIS$ADDRESS_STRING <- paste3(paste3(EMIS$textbox11, EMIS$textbox65, EMIS$textbox43), paste3(EMIS$textbox46, EMIS$textbox49), paste3(EMIS$textbox53, "VIC", EMIS$textbox56), sep = ", ") 

write.csv(EMIS, "N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/7. EMIS/160517_unlabelled_EMIS.csv",
          row.names = F)

```{python DHHS parserator} import re import GNAF import itertools import operator

import pandas as pd

unlabelled = pd.read_csv("N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/5. Cooling Towers/160517_unlabelled_DHHS.csv", header=None)

site = unlabelled[0]

unlabelled = unlabelled[4].apply(lambda x: re.sub(r', Australia$', "", x))

def accumulate(l): it = itertools.groupby(l, operator.itemgetter(1)) for key, subiter in it: yield key, ' '.join(item[0] for item in subiter)

labelled = unlabelled.apply(lambda x: GNAF.parse(x))

labelled = labelled.apply(lambda x: list(accumulate(x)))

cols = ['SITE','ADDRESS_STRING', 'BUILDING_NAME', 'LOT_NUMBER_PREFIX', 'LOT_NUMBER', 'LOT_NUMBER_SUFFIX', 'LEVEL_TYPE', 'LEVEL_NUMBER', 'FLAT_TYPE', 'FLAT_NUMBER_PREFIX', 'FLAT_NUMBER', 'FLAT_NUMBER_SUFFIX', 'NUMBER_FIRST', 'NUMBER_FIRST_SUFFIX', 'NUMBER_LAST', 'STREET_NAME', 'STREET_TYPE_CODE', 'LOCALITY_NAME', 'POSTCODE', 'STATE_ABBREVIATION']

df = pd.DataFrame(columns=cols)

df['SITE'] = site

df['ADDRESS_STRING'] = unlabelled

for i, row in enumerate(labelled): for item in row: df[item[0]][i] = item[1]

df2 = df.ix[1:]

df2.to_csv('N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/5. Cooling Towers/160517_labelled_DHHS.csv', index=False)

```{python EMIS parserator}
import re
import GNAF
import itertools
import operator

import pandas as pd

unlabelled = pd.read_csv("N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/7. EMIS/160517_unlabelled_EMIS.csv", header=None)

site = unlabelled[0]

unlabelled = unlabelled[8].apply(lambda x: re.sub(r', Australia$', "", x))

def accumulate(l):
    it = itertools.groupby(l, operator.itemgetter(1))
    for key, subiter in it:
       yield key, ' '.join(item[0] for item in subiter)

labelled = unlabelled.apply(lambda x: GNAF.parse(x))

labelled = labelled.apply(lambda x: list(accumulate(x)))

cols = ['SITE','ADDRESS_STRING', 'BUILDING_NAME', 'LOT_NUMBER_PREFIX', 'LOT_NUMBER', 'LOT_NUMBER_SUFFIX', 'LEVEL_TYPE', 'LEVEL_NUMBER', 'FLAT_TYPE', 'FLAT_NUMBER_PREFIX', 'FLAT_NUMBER', 'FLAT_NUMBER_SUFFIX',
        'NUMBER_FIRST', 'NUMBER_FIRST_SUFFIX',  'NUMBER_LAST', 'STREET_NAME', 'STREET_TYPE_CODE', 'LOCALITY_NAME',
        'POSTCODE', 'STATE_ABBREVIATION']

df = pd.DataFrame(columns=cols)

df['SITE'] = site

df['ADDRESS_STRING'] = unlabelled

for i, row in enumerate(labelled):
    for item in row:
        df[item[0]][i] = item[1]

df2 = df.ix[1:]

df2.to_csv('N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/7. EMIS/160517_labelled_EMIS.csv', index=False)
labelled_EMIS <- read.csv("N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/7. EMIS/160517_labelled_EMIS.csv", stringsAsFactors = F)


manual_training <- labelled_EMIS %>%
                   filter(BUILDING_NAME != "") %>%
                   sample_frac(0.2)

manual_training2 <- labelled_EMIS %>%
                   filter(BUILDING_NAME == "") %>%
                   sample_frac(0.005)

manual_training3 <- rbind(manual_training, manual_training2) %>%
                    select(ADDRESS_STRING)

write.table(manual_training3, "C:/Users/farrelp1/Documents/GNAFAPI/parserator/data/170517_unlabelled_EMIS.csv",
          row.names = F, col.names = F)

manual_training <- labelled_EMIS %>%
                   filter(BUILDING_NAME == "LOT" |
                            FLAT_TYPE == "PREV" |
                            grepl("SH", BUILDING_NAME) |
                            grepl("GRN", BUILDING_NAME) |
                            grepl("GRD", BUILDING_NAME) |
                            grepl("LOWERGROUND", BUILDING_NAME) |
                            grepl("GROUND", FLAT_TYPE) |
                            FLAT_TYPE == "GR" |
                            FLAT_TYPE == "G") %>%
                     select(ADDRESS_STRING)

write.table(manual_training, "C:/Users/farrelp1/Documents/GNAFAPI/parserator/data/170517_unlabelled_EMIS2.csv",
          row.names = F, col.names = F)

manual_training <- labelled_EMIS %>%
                   filter(BUILDING_NAME == "PRINCES HIGHWAY," |
                          BUILDING_NAME == "WELLINGTON PARADE," |
                          BUILDING_NAME == "OLYMPIC BOULEVARD," |
                          BUILDING_NAME == "PARK CRESCENT," |
                          BUILDING_NAME == "MELBOURNE PLACE,") %>%
                     select(ADDRESS_STRING)

write.table(manual_training, "C:/Users/farrelp1/Documents/GNAFAPI/parserator/data/170517_unlabelled_EMIS3.csv",
          row.names = F, col.names = F)

manual_training <- labelled_EMIS %>%
                   filter(grepl("^[A-Z]{1}\\s.*", STREET_NAME)) %>%
                   select(ADDRESS_STRING)

write.table(manual_training, "C:/Users/farrelp1/Documents/GNAFAPI/parserator/data/090617_unlabelled_EMIS4.csv",
          row.names = F, col.names = F)
ABR <- odbcDriverConnect(connection="Driver={SQL Server Native Client 11.0};
                                    server= WVdb1devsql;
                                    database=ABR;
                                    uid=CWW\\farrelp1;
                                    Trusted_Connection=Yes;")

GNAF_address <- sqlQuery(ABR,
                "SELECT DISTINCT 
                  [ADDRESS_DETAIL_PID]
                ,CASE
                WHEN [NUMBER_FIRST] % 2 = 0
                THEN REPLACE(SUBSTRING([LOCALITY_NAME], 1, 3) + SUBSTRING([STREET_NAME], 1, 3) + 'EVEN', ' ', '')
                ELSE REPLACE(SUBSTRING([LOCALITY_NAME], 1, 3) + SUBSTRING([STREET_NAME], 1, 3) + 'ODD', ' ', '')
                END AS [FULL_KEY]
                ,CASE
                WHEN [NUMBER_FIRST] % 2 = 0
                THEN REPLACE(SUBSTRING([STREET_NAME], 1, 2) + 'EVEN', ' ', '')
                ELSE REPLACE(SUBSTRING([STREET_NAME], 1, 2) + 'ODD', ' ', '')
                END AS [KEY]
                ,[NUMBER_FIRST]
                ,[NUMBER_LAST]
                ,[STREET_NAME]
                ,[STREET_TYPE_CODE]
                ,CASE
                WHEN [STREET_TYPE_CODE] IS NULL
                THEN [STREET_NAME]
                ELSE [STREET_NAME] + ' ' + [STREET_TYPE_CODE]
                END AS [STREET]
                ,[LOCALITY_NAME]
                ,[POSTCODE]
                ,[LONGITUDE]
                ,[LATITUDE]
                ,CASE
                WHEN [NUMBER_LAST] IS NULL
                THEN 1
                ELSE 2
                END AS ADDRESS_TYPE
                ,CONFIDENCE
                ,[PRIMARY_SECONDARY]
                FROM [ABR].[dbo].[ADDRESS_VIEW]
                WHERE [PRIMARY_SECONDARY] = 'P' OR [PRIMARY_SECONDARY] IS NULL;",
                stringsAsFactors=FALSE)
text_clean <- function(x, field.type = "text") {

  if(x == "" | is.na(x)) {

    return(NA)

  }

  else if (field.type == "text"){

    x <- gsub(",$","",x)

    return(toupper(x))

  }

  else if (field.type == "num"){

    return(as.numeric(gsub(",","",x)))

  }

}

text_clean <- Vectorize(text_clean) 

labelled_DHHS <- read.csv("N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/5. Cooling Towers/160517_labelled_DHHS.csv", stringsAsFactors = F)

DHHS_sites <- read.csv("N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/5. Cooling Towers/160517_unlabelled_DHHS.csv", stringsAsFactors = F)

indx <- !is.na(labelled_DHHS$NUMBER_FIRST_SUFFIX) & is.numeric(labelled_DHHS$NUMBER_FIRST_SUFFIX)

labelled_DHHS[indx,]$NUMBER_FIRST <- labelled_DHHS[indx,]$NUMBER_FIRST_SUFFIX

labelled_DHHS[indx,]$NUMBER_FIRST_SUFFIX <- NA

indx <- grepl("[A-Z]$", labelled_DHHS$NUMBER_FIRST)

numFirst <- gsub("([0-9]+)[A-Z]", "\\1", labelled_DHHS[indx,]$NUMBER_FIRST)  

suffix <- gsub("([0-9]+)([A-Z])", "\\2", labelled_DHHS[indx,]$NUMBER_FIRST)

labelled_DHHS[indx,]$NUMBER_FIRST <- numFirst

labelled_DHHS[indx,]$NUMBER_FIRST_SUFFIX <- suffix

labelled_DHHS <- labelled_DHHS %>%
                 mutate(BUILDING_NAME = text_clean(BUILDING_NAME),
                        LEVEL_TYPE = text_clean(LEVEL_TYPE),
                        LEVEL_NUMBER = text_clean(LEVEL_NUMBER, field.type = "num"),
                        FLAT_TYPE = text_clean(FLAT_TYPE),
                        FLAT_NUMBER_PREFIX = text_clean(FLAT_NUMBER_PREFIX),
                        FLAT_NUMBER = text_clean(FLAT_NUMBER, field.type = "num"),
                        NUMBER_FIRST = text_clean(NUMBER_FIRST, field.type = "num"),
                        NUMBER_LAST = text_clean(NUMBER_LAST, field.type = "num"),
                        STREET_NAME = text_clean(STREET_NAME),
                        STREET_TYPE_CODE = text_clean(STREET_TYPE_CODE),
                        LOCALITY_NAME = text_clean(LOCALITY_NAME),
                        POSTCODE = text_clean(POSTCODE, field.type = "num"),
                        STATE_ABBREVIATION = text_clean(STATE_ABBREVIATION),
                        KEY = gsub(" ", "", paste0(#substr(LOCALITY_NAME, 1,3), 
                                     substr(gsub("'","", STREET_NAME), 1,1),
                                     ifelse(NUMBER_FIRST %% 2 == 0, "EVEN", "ODD"))),
                        ID = row_number(),
                        ADDRESS_TYPE = ifelse(is.na(NUMBER_LAST), 1, 2)) %>%
                        left_join(select(DHHS_sites, -ADDRESS_STRING), by = "SITE")

GNAF_address <- GNAF_address %>%
                mutate(PID = as.numeric(gsub("GAVIC", "", ADDRESS_DETAIL_PID)))


Rcpp::sourceCpp("C:/Users/farrelp1/Documents/AddressR/src/AddressMatchTotalScorev6.1.cpp")
Rcpp::sourceCpp("C:/Users/farrelp1/Documents/AddressR/src/AddressMatchTotalScorev7.cpp")
Rcpp::sourceCpp("C:/Users/farrelp1/Documents/AddressR/src/AddressMatchTotalScorev9.cpp")
Rcpp::sourceCpp("C:/Users/farrelp1/Documents/AddressR/src/AddressMatchTotalScorev10.cpp")

Clean2 <- function(nonblanks, ID) {

  # GNAF_address <- data.table::fread("C:/Users/farrelp1/Documents/GNAFAPI/data/May17_GNAF.csv", stringsAsFactors = F)
  # 
  # colnames(GNAF_address) <- colnames(data.table::fread("C:/Users/farrelp1/Documents/AddressR/data/May17_GNAF_address.csv", stringsAsFactors = F))[1:15]

  options <- lapply(nonblanks$KEY, function(x) which(x == GNAF_address$KEY))

  names(options) <- ID

  totalscore <- lapply(seq_along(options), function(i) parallelAddressTotalScore6(nonblanks[i,], GNAF_address[options[[i]],]))

  names(totalscore) <- ID

  options2 <- lapply(seq_along(options), function(i) cbind(option = suppressWarnings(options[[i]][which(totalscore[[i]] == min(totalscore[[i]]))]), 
                                                           score = suppressWarnings(totalscore[[i]][which(totalscore[[i]] == min(totalscore[[i]]))])))

  names(options2) <- names(options)

  options3 <- options2[unname(unlist(lapply(options2, function(x) nrow(x) > 0)))]

  finalresult <- plyr::ldply(names(options3), function(i) cbind(id = i,options3[[i]], GNAF_address[options2[[i]][,1],]))

  # finalresult <- plyr::ldply(names(options3), function(i) cbind(id = i,options3[[i]], GNAF_address[options2[[i]][,1],])) %>%
  #                 mutate(id = as.numeric(as.character(id))) %>%
  #                 group_by(id) %>%
  #                 filter(CONFIDENCE == max(CONFIDENCE))

  #finalresult <- plyr::ldply(options4, function(x) x %>% filter(CONFIDENCE == max(CONFIDENCE))) #%>%
                 #mutate(ID = as.numeric(.id))

  #dup <- finalresult[duplicated(finalresult$.id),]

  #CWW_TradeWaste_ABR <- select(finalresult, AG_CONSUMERNUMBER, ADDRESS_DETAIL_PID) %>% distinct()

  return(finalresult)

}

Clean3 <- function(nonblanks, ID) {

  GNAF_address2 <- data.table::as.data.table(GNAF_address)

  setkey(GNAF_address2, "KEY")

  nonblanks2 <- data.table::as.data.table(nonblanks)

  setkey(nonblanks2, "KEY")

  totalscore2 <- lapply(1:nrow(nonblanks2), function(i) {

    if(!is.na(nonblanks2[[i, "NUMBER_FIRST"]])) {
    options <- GNAF_address2[GNAF_address2[["KEY"]] == nonblanks2[[i, "KEY"]] & GNAF_address2[["NUMBER_FIRST"]] < nonblanks2[[i, "NUMBER_FIRST"]] + 50 & GNAF_address2[["NUMBER_FIRST"]] > nonblanks2[[i, "NUMBER_FIRST"]] - 50]
    }

    else {

      options <- GNAF_address2[GNAF_address2[["KEY"]] == nonblanks2[[i, "KEY"]]]

    }


    result <- parallelAddressTotalScore6(nonblanks2[i,], options)

    colnames(result) = options[["ADDRESS_DETAIL_PID"]]

    return(result)
    })

  names(totalscore2) <- nonblanks2[["SITE"]]

  setkey(GNAF_address2, "ADDRESS_DETAIL_PID")

  finalresult <- plyr::ldply(names(totalscore2), function(i) {

    if(length(totalscore2[[i]]) > 0) {

      ADDRESS_DETAIL_PID <- names(totalscore2[[i]][,which.min(totalscore2[[i]])])

      score <- as.numeric(unname(totalscore2[[i]][,which.min(totalscore2[[i]])]))

      result <- as.data.table(cbind(id = i, ADDRESS_DETAIL_PID = ADDRESS_DETAIL_PID, score = score))

    }

    setkey(result, "ADDRESS_DETAIL_PID")

    GNAF_address2[result]

    })

  finalresult$score <- as.numeric(finalresult$score)

  return(finalresult)

}

#AddressR::AddressClean2
#DHHS_GNAF <- AddressR::AddressClean2(nonblanks=labelled_DHHS, ID=labelled_DHHS$ID)

DHHS_GNAF <- Clean2(nonblanks=labelled_DHHS, ID=labelled_DHHS$SITE)

# for (i in seq_along(options)) {
#   
#   r <- parallelAddressTotalScore3(nonblanks[i, ], GNAF_address[options[[i]], ])
#   
# }

# test <- parallelAddressTotalScore9(labelled_DHHS[1,], GNAF_address) works but very slow
test <- parallelAddressTotalScore10(labelled_DHHS[1:2,], GNAF_address)
text_clean <- function(x, field.type = "text") {

  if(x == "" | is.na(x)) {

    return(NA)

  }

  else if (field.type == "text"){

    x <- gsub(",$","",x)

    return(toupper(x))

  }

  else if (field.type == "num"){

    return(as.numeric(gsub(",","",x)))

  }

}

text_clean <- Vectorize(text_clean) 

labelled_EMIS <- read.csv("N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/7. EMIS/160517_labelled_EMIS.csv", stringsAsFactors = F)

EMIS_sites <- read.csv("N:/ABR/Output 1.1 New Trade Waste Customers for 2016-17/7. EMIS/160517_unlabelled_EMIS.csv", stringsAsFactors = F)

indx <- !is.na(labelled_EMIS$NUMBER_FIRST_SUFFIX) & is.numeric(labelled_EMIS$NUMBER_FIRST_SUFFIX)

if(all(indx != FALSE)) {
  labelled_EMIS[indx,]$NUMBER_FIRST <- labelled_EMIS[indx,]$NUMBER_FIRST_SUFFIX

  labelled_EMIS[indx,]$NUMBER_FIRST_SUFFIX <- NA
}

indx <- grepl("[A-Z]$", labelled_EMIS$NUMBER_FIRST)

if(all(indx != FALSE)) {
  numFirst <- gsub("([0-9]+)[A-Z]", "\\1", labelled_EMIS[indx,]$NUMBER_FIRST)  

  suffix <- gsub("([0-9]+)([A-Z])", "\\2", labelled_EMIS[indx,]$NUMBER_FIRST)

  labelled_EMIS[indx,]$NUMBER_FIRST <- numFirst

  labelled_EMIS[indx,]$NUMBER_FIRST_SUFFIX <- suffix
}

indx <- grepl("^[A-Z]{1}\\s.*", labelled_EMIS$STREET_NAME)

if(all(indx != FALSE)) {

  suffix <- gsub("^([A-Z]{1})\\s(.*)", "\\1", labelled_EMIS[indx,]$STREET_NAME)  

  streetname <- gsub("^([A-Z]{1})\\s(.*)", "\\2", labelled_EMIS[indx,]$STREET_NAME)  

  labelled_EMIS[indx,]$NUMBER_FIRST_SUFFIX <- suffix

  labelled_EMIS[indx,]$STREET_NAME <- streetname

}

indx <- grepl("^MOUNT\\s.*", labelled_EMIS$STREET_NAME)

labelled_EMIS$STREET_NAME[indx] <- gsub("^(MOUNT)", "MT", labelled_EMIS[indx,]$STREET_NAME)  

labelled_EMIS <- labelled_EMIS %>%
                 mutate(BUILDING_NAME = unname(text_clean(BUILDING_NAME)),
                        LEVEL_TYPE = unname(text_clean(LEVEL_TYPE)),
                        LEVEL_NUMBER = unname(text_clean(LEVEL_NUMBER, field.type = "num")),
                        FLAT_TYPE = unname(text_clean(FLAT_TYPE)),
                        FLAT_NUMBER_PREFIX = unname(text_clean(FLAT_NUMBER_PREFIX)),
                        FLAT_NUMBER = unname(text_clean(FLAT_NUMBER, field.type = "num")),
                        NUMBER_FIRST = unname(text_clean(NUMBER_FIRST, field.type = "num")),
                        NUMBER_LAST = unname(text_clean(NUMBER_LAST, field.type = "num")),
                        STREET_NAME = unname(text_clean(STREET_NAME)),
                        STREET_TYPE_CODE = unname(text_clean(STREET_TYPE_CODE)),
                        LOCALITY_NAME = unname(text_clean(LOCALITY_NAME)),
                        POSTCODE = unname(text_clean(POSTCODE, field.type = "num")),
                        STATE_ABBREVIATION = unname(text_clean(STATE_ABBREVIATION)),
                        KEY = gsub(" ", "", paste0(#substr(LOCALITY_NAME, 1,3), 
                                     substr(gsub("'","", STREET_NAME), 1,2),
                                     ifelse(NUMBER_FIRST %% 2 == 0, "EVEN", "ODD"))),
                        ID = row_number(),
                        ADDRESS_TYPE = ifelse(is.na(NUMBER_LAST), 1, 2)) #%>%
                        #left_join(select(DHHS_sites, -ADDRESS_STRING), by = "SITE")

#Rcpp::sourceCpp("C:/Users/farrelp1/Documents/AddressR/src/AddressMatchTotalScorev6.cpp")

labelled_EMIS$LATITUDE = rep(as.numeric(NA), length=nrow(labelled_EMIS))

labelled_EMIS$LONGITUDE = rep(as.numeric(NA), length=nrow(labelled_EMIS))

EMIS_GNAF <- Clean3(nonblanks=labelled_EMIS, ID=labelled_EMIS$SITE)
#include <Rcpp.h>

// [[Rcpp::export]]

double diff(int x, int y) {

 // if(x == NA_INTEGER || y == NA_INTEGER) {

 //   return int(0);

 // }

  if(x > y) {

    double ratio = 1 / (double(y)/double(x));
    return ratio;

  }

  else {

    double ratio = 1 / (double(x)/double(y));
    return ratio;

  }

};
#include <Rcpp.h>

// [[Rcpp::export]]

double divide() {
  //Rcpp::Rcout << std::setprecision(10) << 4.50;
  //return 75 / 78;
  //return 0.5556466;
  double x = double(75)/double(78);
  return x;

}
#include <Rcpp.h>

// [[Rcpp::export]]

Rcpp::LogicalVector test(Rcpp::DataFrame x, Rcpp::CharacterVector key) {

  Rcpp::CharacterVector sub = x["KEY"];
  char* level = key[0];
  Rcpp::LogicalVector ind(sub.size());
  for (int i = 0; i < sub.size(); i++){
    ind[i] = (sub[i] == level);
  }
  return(ind);
  //return x["STREET_NAME"][ind];

}
#include <Rcpp.h>

// [[Rcpp::export]]

Rcpp::CharacterVector test2(Rcpp::DataFrame x, Rcpp::CharacterVector key) {

  Rcpp::CharacterVector sub = x["KEY"];
  char* level = key[0];
  Rcpp::LogicalVector ind(sub.size());
  for (int i = 0; i < sub.size(); i++){
    ind[i] = (sub[i] == level);
  }

  Rcpp::CharacterVector STREET_NAME = x["STREET_NAME"];
  Rcpp::CharacterVector LOCALITY_NAME = x["LOCALITY_NAME"];
  Rcpp::NumericVector NUMBER_FIRST = x["NUMBER_FIRST"];
  Rcpp::NumericVector NUMBER_LAST = x["NUMBER_LAST"];
  Rcpp::CharacterVector STREET_TYPE_CODE = x["STREET_TYPE_CODE"];
  Rcpp::NumericVector LONGITUDE = x["LONGITUDE"];
  Rcpp::NumericVector LATITUDE = x["LATITUDE"];
  Rcpp::NumericVector POSTCODE = x["POSTCODE"];

  //return(ind);
  //return Rcpp::DataFrame::create(Rcpp::Named("STREET_NAME")  = STREET_NAME[ind],
                                //Rcpp::Named("LOCALITY_NAME")  = LOCALITY_NAME[ind],
                                //Rcpp::Named("NUMBER_FIRST")  = NUMBER_FIRST[ind],
                                //Rcpp::Named("NUMBER_LAST")  = NUMBER_LAST[ind],
                                //Rcpp::Named("STREET_TYPE_CODE")  = STREET_TYPE_CODE[ind],
                                //Rcpp::Named("LONGITUDE")  = LONGITUDE[ind],
                                //Rcpp::Named("LATITUDE")  = LATITUDE[ind],
                                //Rcpp::Named("POSTCODE")  = POSTCODE[ind]);

    Rcpp::DataFrame y = Rcpp::DataFrame::create(Rcpp::Named("STREET_NAME")  = STREET_NAME[ind],
                                Rcpp::Named("LOCALITY_NAME")  = LOCALITY_NAME[ind],
                                Rcpp::Named("NUMBER_FIRST")  = NUMBER_FIRST[ind],
                                Rcpp::Named("NUMBER_LAST")  = NUMBER_LAST[ind],
                                Rcpp::Named("STREET_TYPE_CODE")  = STREET_TYPE_CODE[ind],
                                Rcpp::Named("LONGITUDE")  = LONGITUDE[ind],
                                Rcpp::Named("LATITUDE")  = LATITUDE[ind],


                                Rcpp::Named("POSTCODE")  = POSTCODE[ind]);

  Rcpp::CharacterVector z = y["STREET_NAME"];  
  int len = z.size();

  if(len == 0) {

  return y["STREET_NAME"];}

  else {return x["STREET_NAME"];}
}
#include <Rcpp.h>
using namespace Rcpp;

// [[Rcpp::export]]
IntegerVector order_(NumericVector x) {
  if (is_true(any(duplicated(x)))) {
    Rf_warning("There are duplicates in 'x'; order not guaranteed to match that of R's base::order");
  }
  NumericVector sorted = clone(x).sort();
  return match(sorted, x);
}
// [[Rcpp::export]]
NumericVector sortIt(NumericVector v){
    int len = v.size();
    std::sort(&v[0], &v[len]);
    return v;
}
// [[Rcpp::export]]
NumericVector in_range(NumericVector x) {

    NumericVector out = NumericVector(10);

    for (int i=0; i<100; i++) {
      out[i] = x[i];
    }
    return out;
} 
#include <Rcpp.h>
using namespace Rcpp;

// [[Rcpp::export]]

double which_min(Rcpp::NumericVector x) {

  //Rcpp::NumericVector x(xs);
  Rcpp::NumericVector::iterator it =       // iterator type
    std::min_element(x.begin(), x.end());  // STL algo

  double pos = it - x.begin();

  return pos; 

  //return Rcpp::wrap(it - x.begin()); 

}
t <- sample(seq(from = 20, to = 50, by = 5), size = 50, replace = TRUE)
t2 <- order_(t)
t3 <- in_range(sortIt(t))

nrow(test2(GNAF_address, "C"))

t <- sample(seq(from = 20, to = 50, by = 5), size = 50, replace = TRUE)

which_min(t)


phillipf/GNAFAPI documentation built on May 25, 2019, 5:05 a.m.