Introduction and installation

The package fortranClustering delivers a basic example of how to use a fortran subroutine in R. It also comes with an equivalent implementation of the fortran subroutine in R code and a simple example data set.

To build and install the package on a linux system make sure that the devtools package is installed. Then load it with

library(devtools)

Now you can install the fortranClustering package directly from github

install_github("MartinRoth/fortranClustering",ref="master")

and load it

library(fortranClustering)

How to include fortran code in R

Using a R package is a convenient way of adding functionalities to R such as new functions or including code that was not written in R. Documentation and man pages are usually shipped with an R package. However, installing an R package hides pretty much all the steps needed to run fortran code in R. Thus, in this section we will step-by-step explain how to make a fortran subroutine executable within a R session.

Step 1: The fortran subroutine

subroutine clustering(data2d,startID,thres,finID,numIDs,tcl,missval,nx,ny)

  implicit none
  ! input
  real(kind=8), intent(in) :: data2d(nx,ny) ! the input 2D field
  integer, intent(in)      :: nx,ny         ! input dimensions
  integer, intent(in)      :: startID       ! the first ID to use
  real(kind=8), intent(in) :: thres         ! minimum value for clustering  
  real(kind=8), intent(in) :: missval       ! value for missing data
  ! output
  integer, intent(out)     :: finID         ! the last used ID
  integer, intent(out)     :: numIDs        ! the total number of clusters/IDs
  integer, intent(out)     :: tcl(nx,ny)    ! the clustered field holding the IDs
  ! counters etc.
  integer                  :: x,y           ! for spatial dimenstions
  integer                  :: conx,cony     ! for spatial dims in nested loop
  integer                  :: i,tp          ! for misc.
  integer                  :: clID          ! for iterating clusters
  integer, allocatable     :: allIDs(:)     ! helper vector for re-assigning of IDs
  integer                  :: neighb(2)     ! temporary memory for neighbouring grid points
  logical                  :: mask(nx,ny)   ! logical field; true if value > thres

  ! initialize variables and arrays
  tcl=-999
  mask=.false.
  clID=startID
  numIDs=0

  ! mask values higher than threshold and if not missing value
  do y=1,ny
    do x=1,nx
      if(data2d(x,y)>thres .AND. data2d(x,y).ne.missval)mask(x,y)=.true.
    end do
  end do

  ! check if there are any gridpoints to cluster
  if(ANY(mask))then

    ! assign IDs to continous cells
    do y=1,ny
      do x=1,nx
        neighb=-999
        if(mask(x,y))then
          ! gather neighbouring IDs; left,up
          if(x.ne.1)  neighb(1)=tcl((x-1),y)
          if(y.ne.1)  neighb(2)=tcl(x,(y-1))
          ! check if there is NO cluster around the current pixel; create new one
          if(ALL(neighb==-999))then
            tcl(x,y)=clID
            clID=clID+1
            numIDs=numIDs+1
          else
            ! both neighbours are in the same cluster
            if(neighb(1)==neighb(2).and.neighb(1).ne.-999)then
              tcl(x,y)=neighb(1)
            end if
            ! both neighbors are in different clusters but none of them is (-999)
            if(neighb(1).ne.neighb(2) .and. neighb(1).ne.-999 .and. neighb(2).ne.-999)then
              numIDs=numIDs-1
              tcl(x,y)=MINVAL(neighb)
              ! update the existing higher cluster with the lowest neighbour
              do cony=1,ny
                do conx=1,nx
                  if(tcl(conx,cony)==MAXVAL(neighb))tcl(conx,cony)=MINVAL(neighb)
                end do
              end do
            end if
            ! both neighbors are in different clusters but ONE of them is empty(-999)
            if(neighb(1).ne.neighb(2) .and. (neighb(1)==-999 .or. neighb(2)==-999))then
              tcl(x,y)=MAXVAL(neighb)
            end if
          end if
        end if
      end do
    end do

    ! gather IDs and rename to gapless ascending IDs
    if(numIDs>0)then
      allocate(allIDs(numIDs))
      allIDs=-999
      clID=startID-1
      tp=1
      do y=1,ny
        do x=1,nx
          if(.NOT.ANY(allIDs==tcl(x,y)) .AND. tcl(x,y).ne.-999)then
            allIDs(tp)=tcl(x,y)
            tp=tp+1
          end if
        end do
      end do

      do i=1,tp-1
        clID=clID+1
        do y=1,ny
          do x=1,nx
            if(tcl(x,y)==allIDs(i))then
              tcl(x,y)=clID
            end if
          end do
        end do
      end do
      deallocate(allIDs)
    end if

  end if
  ! return final cluster ID
  finID=clID
end subroutine clustering

Step 2: Compile a shared object file

R CMD SHLIB clustering2d.f90

Step 3: Load the shared object into R

dyn.load('clustering2d.so')

Step 4: The .Fortran CALL

retdata <- .Fortran("clustering",
           data2d = as.double(x),
           startID = as.integer(1),
           thres = as.double(thres),
           finID = as.integer(1),
           numIDs = as.integer(1),
           tcl = matrix(rep(as.integer(1),nx*ny),ncol=nx),
           missval=as.double(-999.0),
           nx=as.integer(nx),
           ny=as.integer(ny))

Optional step 5: Writing a wrapper function

clusteringf90 <- function(x,thres=0) {
  # only allow a matrix as input
  if(!is.matrix(x)){
    stop("x is not a matrix!")
  }
  # load the fortran subroutine
  if(!is.loaded('clustering2d')){
    dyn.load('clustering2d.so')
  }
  # set NA to -999
  x[is.na(x)] <- -999.0
  # get dimensions of input matrix
  nx <- dim(x)[1]
  ny <- dim(x)[2]
  # call the fortran subroutine
  retdata <- .Fortran("clustering",
                       data2d = as.double(x),
                       startID = as.integer(1),
                       thres = as.double(thres),
                       finID = as.integer(1),
                       numIDs = as.integer(1),
                       tcl = matrix(rep(as.integer(1),nx*ny),ncol=nx),
                       missval=as.double(-999.0),
                       nx=as.integer(nx),
                       ny=as.integer(ny))
  # set -999 in tcl back to NA
  retdata$tcl[retdata$tcl==-999] <- NA
  # return the data
  return(retdata)
}

An example R session

Load the example data and quickly plot it

data("fCdata")
filled.contour(fCdata)

Run the FORTRAN implementation and plot the results

resultf90 <- clusteringf90(fCdata)
filled.contour(resultf90)

Let's compare performance!

# run the FORTRAN version
system.time({
  resultf90 <- clusteringf90(fCdata)
})

# now the R implementation
system.time({
  resultR <- clusteringR(fCdata)
})

Conclusion

The R build environment and FORTRAN interface provide a comfortable way to include FORTRAN code in R. With a few commands it is possible to compile and load subroutines into the R workspace.



MartinRoth/fortranClustering documentation built on May 7, 2019, 3:38 p.m.