Dependencies
This notebook requires
# primary package
library(areal)
# tidyverse packages
library(dplyr)
# spatial packages
library(sf)
library(tidycensus)
library(tigris)
To enable
caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
Attaching package: ‘tigris’
The following object is masked from ‘package:graphics’:
plot
# other packages
library(gridExtra)
Attaching package: ‘gridExtra’
The following object is masked from ‘package:dplyr’:
combine
library(microbenchmark)
library(testthat)
Comparisons with sf
Produce Estimates
First, we’ll create three spatially extensive estimates for comparison. Two will use the areal
package, varying the type of weight applied to the estimate:
# areal package, spatially extensive using total
areal_exT <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID,
weight = "total", output = "tibble", extensive = "TOTAL_E")
# areal package, spatially extensive using sum
areal_exS <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID,
weight = "sum", output = "tibble", extensive = "TOTAL_E")
Next, we’ll replicate the process using sf
:
# sf package, spatially extensive
sf_ex <- st_interpolate_aw(ar_stl_race["TOTAL_E"], ar_stl_wards, extensive = TRUE)
st_interpolate_aw assumes attributes are constant over areas of x
We’ll also produce a spatially intensive estimate using areal
:
# areal package, spatially intensive
areal_in <- aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_asthma, sid = GEOID,
weight = "sum", output = "tibble", intensive = "ASTHMA")
And finally, we’ll replicate the spatially intensive estimate using sf
:
# sf package, spatially intensive
sf_in <- st_interpolate_aw(ar_stl_asthma["ASTHMA"], ar_stl_wards, extensive = FALSE)
st_interpolate_aw assumes attributes are constant over areas of x
Compile Results
First, we’ll compile the extensive results:
# areal, extensive sum
areal_exS <- areal_exS %>%
select(WARD, TOTAL_E) %>%
rename(areal_exS = TOTAL_E)
# areal, extensive total
areal_exT <- areal_exT %>%
select(WARD, TOTAL_E) %>%
rename(areal_exT = TOTAL_E)
# sf, extensive total
sf_ex <- sf_ex %>%
rename(sf_ex = TOTAL_E)
st_geometry(sf_ex) <- NULL
# combine
extensive <- left_join(sf_ex, areal_exT, by = c("Group.1" = "WARD")) %>%
left_join(., areal_exS, by = c("Group.1" = "WARD")) %>%
mutate(delta = areal_exT-areal_exS) %>%
rename(Ward = Group.1) %>%
as_tibble()
We’ll make a similar compliation of the intensive results:
# areal, intensive
areal_in <- areal_in %>%
select(WARD, ASTHMA) %>%
rename(areal_in = ASTHMA)
# sf, intensive
sf_in <- sf_in %>%
rename(sf_in = ASTHMA)
st_geometry(sf_in) <- NULL
# combine
intensive <- left_join(sf_in, areal_in, by = c("Group.1" = "WARD")) %>%
rename(Ward = Group.1) %>%
as_tibble()
Print Tables
The following code chunk produces two tables for the manuscript:
# produce rounded extensive estimates
extensiveSub <- extensive %>%
filter(Ward >= 1 & Ward <= 10) %>%
mutate(
sf_ex = round(sf_ex, digits = 3),
areal_exT = round(areal_exT, digits = 3),
areal_exS = round(areal_exS, digits = 3),
delta = round(delta, digits = 3)
) %>%
rename(
`sf` = sf_ex,
`areal, total weight` = areal_exT,
`areal, sum weight` = areal_exS
)
# print extensive table
png(filename = "paper/extensiveTable.png", width = 480, height = 300, bg = "white", type = "cairo-png")
grid.arrange(tableGrob(extensiveSub, rows = NULL), top = "Comparison of sf and areal Output\nSpatially Extensive Interpolation")
dev.off()
null device
1
# produce rounded intensive estimates
intensiveSub <- intensive %>%
filter(Ward >= 1 & Ward <= 10) %>%
mutate(
sf_in = round(sf_in, digits = 3),
areal_in = round(areal_in, digits = 3)
) %>%
rename(
`sf` = sf_in,
`areal` = areal_in
)
# print intensive table
png(filename = "paper/intensiveTable.png", width = 480, height = 300, bg = "white", type = "cairo-png")
grid.arrange(tableGrob(intensiveSub, rows = NULL), top = "Comparison of sf and areal Output\nSpatially Intensive Interpolation")
dev.off()
null device
1
Compare Results
We can verify that the areal
workflow with weight = "total"
matches the sf
extensive output:
expect_equal(extensive$sf_ex, extensive$areal_exT)
We can do the same for the intensive interpolations:
expect_equal(intensive$sf_in, intensive$areal_in)
Benchmark
Next, we’ll benchmark the extensive estimation times:
# compare spatially extensive interpolations
microbenchmark(
aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_race, sid = GEOID,
weight = "total", output = "tibble", extensive = "TOTAL_E"),
suppressWarnings(st_interpolate_aw(ar_stl_race["TOTAL_E"], ar_stl_wards, extensive = TRUE))
)
We’ll repeat the process for the intensive estimations:
# compare spatially intensive interpolations
microbenchmark(
aw_interpolate(ar_stl_wards, tid = WARD, source = ar_stl_asthma, sid = GEOID,
weight = "sum", output = "tibble", intensive = "ASTHMA"),
suppressWarnings(st_interpolate_aw(ar_stl_asthma["ASTHMA"], ar_stl_wards, extensive = FALSE))
)
Geometry Collections
Finally, we’ll provide an example of a more intensive estimation process that also triggers the geometry collection workflow, which will add to the estimation time. We need to download several data sets using tigris
and tidycensus
:
Here are the sample sizes for both data sets:
nrow(moPop)
nrow(moBlockGroups)
Here is the benchmark for the estimates produced with these data:
microbenchmark(
aw_interpolate(moBlockGroups, tid = GEOID, source = moPop, sid = GEOID,
weight = "sum", output = "tibble", intensive = "totalPop")
)
