Introduction

This document contains replication code for the examples provided in our Journal of Open Source Software manuscript.

Dependencies

This notebook requires

# primary package
library(areal)

# tidyverse packages
library(dplyr)

# spatial packages
library(sf)
library(tidycensus)
library(tigris)
To enable 
caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.

Attaching package: ‘tigris’

The following object is masked from ‘package:graphics’:

    plot
# other packages
library(gridExtra)

Attaching package: ‘gridExtra’

The following object is masked from ‘package:dplyr’:

    combine
library(microbenchmark)
library(testthat)

Comparisons with sf

Produce Estimates

First, we’ll create three spatially extensive estimates for comparison. Two will use the areal package, varying the type of weight applied to the estimate:

# areal package, spatially extensive using total
areal_exT <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID,
               weight = "total", output = "tibble", extensive = "TOTAL_E")

# areal package, spatially extensive using sum
areal_exS <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID,
                            weight = "sum", output = "tibble", extensive = "TOTAL_E")

Next, we’ll replicate the process using sf:

# sf package, spatially extensive
sf_ex <- st_interpolate_aw(ar_stl_race["TOTAL_E"], ar_stl_wards, extensive = TRUE)
st_interpolate_aw assumes attributes are constant over areas of x

We’ll also produce a spatially intensive estimate using areal:

# areal package, spatially intensive
areal_in <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_asthma, sid = GEOID,
                            weight = "sum", output = "tibble", intensive = "ASTHMA")

And finally, we’ll replicate the spatially intensive estimate using sf:

# sf package, spatially intensive
sf_in <- st_interpolate_aw(ar_stl_asthma["ASTHMA"], ar_stl_wards, extensive = FALSE)
st_interpolate_aw assumes attributes are constant over areas of x

Compile Results

First, we’ll compile the extensive results:

# areal, extensive sum
areal_exS <- areal_exS %>%
  select(WARD, TOTAL_E) %>%
  rename(areal_exS = TOTAL_E)

# areal, extensive total
areal_exT <- areal_exT %>%
  select(WARD, TOTAL_E) %>%
  rename(areal_exT = TOTAL_E)

# sf, extensive total
sf_ex <- sf_ex %>%
  rename(sf_ex = TOTAL_E)
st_geometry(sf_ex) <- NULL

# combine
extensive <- left_join(sf_ex, areal_exT, by = c("Group.1" = "WARD")) %>%
  left_join(., areal_exS, by = c("Group.1" = "WARD")) %>%
  mutate(delta = areal_exT-areal_exS) %>%
  rename(Ward = Group.1) %>%
  as_tibble()

We’ll make a similar compliation of the intensive results:

# areal, intensive
areal_in <- areal_in %>%
  select(WARD, ASTHMA) %>%
  rename(areal_in = ASTHMA)

# sf, intensive
sf_in <- sf_in %>%
  rename(sf_in = ASTHMA)
st_geometry(sf_in) <- NULL

# combine
intensive <- left_join(sf_in, areal_in, by = c("Group.1" = "WARD")) %>%
  rename(Ward = Group.1) %>%
  as_tibble()

Compare Results

We can verify that the areal workflow with weight = "total" matches the sf extensive output:

expect_equal(extensive$sf_ex, extensive$areal_exT)

We can do the same for the intensive interpolations:

expect_equal(intensive$sf_in, intensive$areal_in)

Benchmark

Next, we’ll benchmark the extensive estimation times:

# compare spatially extensive interpolations
microbenchmark(
  aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID,
                 weight = "total", output = "tibble", extensive = "TOTAL_E"),
  suppressWarnings(st_interpolate_aw(ar_stl_race["TOTAL_E"], ar_stl_wards, extensive = TRUE))
)

We’ll repeat the process for the intensive estimations:

# compare spatially intensive interpolations
microbenchmark(
  aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_asthma, sid = GEOID,
                 weight = "sum", output = "tibble", intensive = "ASTHMA"),
  suppressWarnings(st_interpolate_aw(ar_stl_asthma["ASTHMA"], ar_stl_wards, extensive = FALSE))
)

Geometry Collections

Finally, we’ll provide an example of a more intensive estimation process that also triggers the geometry collection workflow, which will add to the estimation time. We need to download several data sets using tigris and tidycensus:

Here are the sample sizes for both data sets:

nrow(moPop)
nrow(moBlockGroups)

Here is the benchmark for the estimates produced with these data:

microbenchmark(
  aw_interpolate(moBlockGroups, tid = GEOID, source = moPop, sid = GEOID,
                 weight = "sum", output = "tibble", intensive = "totalPop")
)
---
title: "Appendix for JOSS Paper"
author: "Christopher Prener, Ph.D."
date: '(`r format(Sys.time(), "%B %d, %Y")`)'
output: 
  github_document: default
  html_notebook: default 
---

## Introduction
This document contains replication code for the examples provided in our *Journal of Open Source Software* manuscript.

## Dependencies
This notebook requires

```{r load-packages}
# primary package
library(areal)

# tidyverse packages
library(dplyr)

# spatial packages
library(sf)
library(tidycensus)
library(tigris)

# other packages
library(gridExtra)
library(microbenchmark)
library(testthat)
```

## Comparisons with `sf`
### Produce Estimates
First, we'll create three spatially extensive estimates for comparison. Two will use the `areal` package, varying the type of weight applied to the estimate:

```{r areal-extensive}
# areal package, spatially extensive using total
areal_exT <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID,
               weight = "total", output = "tibble", extensive = "TOTAL_E")

# areal package, spatially extensive using sum
areal_exS <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID,
                            weight = "sum", output = "tibble", extensive = "TOTAL_E")
```

Next, we'll replicate the process using `sf`:

```{r sf-extensive}
# sf package, spatially extensive
sf_ex <- st_interpolate_aw(ar_stl_race["TOTAL_E"], ar_stl_wards, extensive = TRUE)
```

We'll also produce a spatially intensive estimate using `areal`:

```{r areal-intensive}
# areal package, spatially intensive
areal_in <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_asthma, sid = GEOID,
                            weight = "sum", output = "tibble", intensive = "ASTHMA")
```

And finally, we'll replicate the spatially intensive estimate using `sf`:

```{r sf-intensive}
# sf package, spatially intensive
sf_in <- st_interpolate_aw(ar_stl_asthma["ASTHMA"], ar_stl_wards, extensive = FALSE)
```

### Compile Results
First, we'll compile the extensive results:

```{r compile-extensive}
# areal, extensive sum
areal_exS <- areal_exS %>%
  select(WARD, TOTAL_E) %>%
  rename(areal_exS = TOTAL_E)

# areal, extensive total
areal_exT <- areal_exT %>%
  select(WARD, TOTAL_E) %>%
  rename(areal_exT = TOTAL_E)

# sf, extensive total
sf_ex <- sf_ex %>%
  rename(sf_ex = TOTAL_E)
st_geometry(sf_ex) <- NULL

# combine
extensive <- left_join(sf_ex, areal_exT, by = c("Group.1" = "WARD")) %>%
  left_join(., areal_exS, by = c("Group.1" = "WARD")) %>%
  mutate(delta = areal_exT-areal_exS) %>%
  rename(Ward = Group.1) %>%
  as_tibble()
```

We'll make a similar compliation of the intensive results:

```{r compile-intensive}
# areal, intensive
areal_in <- areal_in %>%
  select(WARD, ASTHMA) %>%
  rename(areal_in = ASTHMA)

# sf, intensive
sf_in <- sf_in %>%
  rename(sf_in = ASTHMA)
st_geometry(sf_in) <- NULL

# combine
intensive <- left_join(sf_in, areal_in, by = c("Group.1" = "WARD")) %>%
  rename(Ward = Group.1) %>%
  as_tibble()
```

### Print Tables
The following code chunk produces two tables for the manuscript:

```{r print-tables}
# produce rounded extensive estimates
extensiveSub <- extensive %>%
  filter(Ward >= 1 & Ward <= 10) %>%
  mutate(
    sf_ex = round(sf_ex, digits = 3),
    areal_exT = round(areal_exT, digits = 3),
    areal_exS = round(areal_exS, digits = 3),
    delta = round(delta, digits = 3)
  ) %>%
  rename(
    `sf` = sf_ex,
    `areal, total weight` = areal_exT,
    `areal, sum weight` = areal_exS
  )

# print extensive table
png(filename = "paper/extensiveTable.png", width = 480, height = 300, bg = "white", type = "cairo-png")
grid.arrange(tableGrob(extensiveSub, rows = NULL), top = "Comparison of sf and areal Output\nSpatially Extensive Interpolation")
dev.off()

# produce rounded intensive estimates
intensiveSub <- intensive %>%
  filter(Ward >= 1 & Ward <= 10) %>%
  mutate(
    sf_in = round(sf_in, digits = 3),
    areal_in = round(areal_in, digits = 3)
  ) %>%
  rename(
    `sf` = sf_in,
    `areal` = areal_in
  )

# print intensive table
png(filename = "paper/intensiveTable.png", width = 480, height = 300, bg = "white", type = "cairo-png")
grid.arrange(tableGrob(intensiveSub, rows = NULL), top = "Comparison of sf and areal Output\nSpatially Intensive Interpolation")
dev.off()
```

### Compare Results
We can verify that the `areal` workflow with `weight = "total"` matches the `sf` extensive output:

```{r verify-extensive}
expect_equal(extensive$sf_ex, extensive$areal_exT)
```

We can do the same for the intensive interpolations:

```{r verify-intensive}
expect_equal(intensive$sf_in, intensive$areal_in)
```

### Benchmark
Next, we'll benchmark the extensive estimation times:

```{r extensive-benchmark}
# compare spatially extensive interpolations
microbenchmark(
  aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID,
                 weight = "total", output = "tibble", extensive = "TOTAL_E"),
  suppressWarnings(st_interpolate_aw(ar_stl_race["TOTAL_E"], ar_stl_wards, extensive = TRUE))
)
```

We'll repeat the process for the intensive estimations:

```{r intensive-benchmark}
# compare spatially intensive interpolations
microbenchmark(
  aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_asthma, sid = GEOID,
                 weight = "sum", output = "tibble", intensive = "ASTHMA"),
  suppressWarnings(st_interpolate_aw(ar_stl_asthma["ASTHMA"], ar_stl_wards, extensive = FALSE))
)
```

## Geometry Collections
Finally, we'll provide an example of a more intensive estimation process that also triggers the geometry collection workflow, which will add to the estimation time. We need to download several data sets using `tigris` and `tidycensus`:

```{r download-census, include=FALSE}
# county populations
moPop <- get_acs(geography = "county", variables = "B01003_001", output = "wide", state = 29, geometry = TRUE) %>%
  st_transform(crs = 26915) %>%
  select(GEOID, B01003_001E) %>%
  rename(totalPop = B01003_001E)

# block group geometry
moBlockGroups <- block_groups(state = 29, class = "sf") %>%
  st_transform(crs = 26915) %>%
  select(GEOID)
```

Here are the sample sizes for both data sets:

```{r census-n}
nrow(moPop)
nrow(moBlockGroups)
```

Here is the benchmark for the estimates produced with these data:

```{r benchmark-geo-collection}
microbenchmark(
  aw_interpolate(moBlockGroups, tid = GEOID, source = moPop, sid = GEOID,
                 weight = "sum", output = "tibble", intensive = "totalPop")
)
```
