#------------------------------------------------------------ 
# Carl Schmertmann
#
# Created  : 4 Dec 13 
# Last Edit: 5 Dec 13
#
# Use Census API and mapping API services to plot ACS data and census tract 
# boundaries over a Google (or other selected) background map
#
# This is mostly based on the excellent examples in David Kahlhe and
#  Hadley Wickham's R Journal article on ggmap2, which can be found
#  at http://journal.r-project.org/archive/2013-1/kahle-wickham.pdf
#------------------------------------------------------------

rm(list = ls())

## libraries for this task
library(acs)
## Loading required package: stringr Loading required package: plyr Loading
## required package: XML
## 
## Attaching package: 'acs'
## 
## The following object is masked from 'package:base':
## 
## apply
library(ggmap)
## Loading required package: ggplot2
library(maptools)
## Loading required package: sp Checking rgeos availability: TRUE

## housekeeping - start graphics fresh, using Windows
graphics.off()
windows(record = TRUE)

## read shape file, and then unzip to extract the .shp, .dbf, .shx files in
## this case the shape file is the complete set of California 2010 tract
## boundaries

download.file(url = "ftp://ftp2.census.gov/geo/pvs/tiger2010st/06_California/06/tl_2010_06_tract10.zip", 
    destfile = "./tl_2010_06_tract10.zip")

unzip("tl_2010_06_tract10.zip")

## convert to a spatial polygons data frame
CA = readShapePoly("tl_2010_06_tract10.shp")

## construct a logical variable that identifies tracts belonging to five
## counties in the SF bay area: Alameda (FIPS code 001), Contra Costa (013), Marin
## (041), San Francisco (075), San Mateo (081)

sf.area = CA@data$COUNTYFP10 %in% c("001", "013", "041", "075", "081")

## subset the spatial polygons, so that only SF area tracts are included

SFmap = CA[sf.area, ]

## translate the polygons into a standard dataframe, for use with ggmap
SFtracts = fortify(SFmap)
## Regions defined for each Polygons

## use the Google API to create a street map of SF, for use as a background
base <- qmap(location = "San Francisco CA", zoom = 10, source = "google", maptype = "roadmap")
## Map from URL :
## http://maps.googleapis.com/maps/api/staticmap?center=San+Francisco+CA&zoom=10&size=%20640x640&scale=%202&maptype=roadmap&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
## Information from URL :
## http://maps.googleapis.com/maps/api/geocode/json?address=San+Francisco+CA&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms

## show just the background map
base

plot of chunk unnamed-chunk-2


## plot the tract boundaries over the base map
base + geom_polygon(aes(x = long, y = lat, group = group), data = SFtracts, 
    col = "purple", fill = NA, size = 0.5)

plot of chunk unnamed-chunk-2


###############################################################
## Now use the Census API and the acs() package to get 2007-11 data.  In this
## example we get the total number of Asian residents in each tract, which is
## variable B02001_005 (5th column of ACS table B02001).  
##
## Variable names can be found in the MS Excel file at
## http://www2.census.gov/acs2011_5yr/summaryfile/ACS2011_5-Year_TableShells.xls
##
## use geo.make from the acs package to define a 'geo.set' object
## corresponding to every tract in this 5-county area of California

SFgeo = geo.make(state = "CA", county = c("Alameda", "Contra Costa", "Marin", 
    "San Francisco", "San Mateo"), tract = "*")

## use a private API key (available free from
## http://www.census.gov/developers/tos/key_request.html )

api.key.install("your_key_goes_here")

## grab the tract-level estimates of Asian residents from the 2007-2011 ACS
A <- acs.fetch(endyear = 2011, span = 5, geography = SFgeo, variable = "B02001_005")

## peek at the structure and data in acs object 'A'
str(A)
## Formal class 'acs' [package "acs"] with 9 slots
##   ..@ endyear       : int 2011
##   ..@ span          : int 5
##   ..@ geography     :'data.frame':   980 obs. of  4 variables:
##   .. ..$ NAME  : chr [1:980] "Census Tract 4001, Alameda County, California" "Census Tract 4002, Alameda County, California" "Census Tract 4003, Alameda County, California" "Census Tract 4004, Alameda County, California" ...
##   .. ..$ state : int [1:980] 6 6 6 6 6 6 6 6 6 6 ...
##   .. ..$ county: int [1:980] 1 1 1 1 1 1 1 1 1 1 ...
##   .. ..$ tract : chr [1:980] "400100" "400200" "400300" "400400" ...
##   ..@ acs.colnames  : chr "B02001_005"
##   ..@ modified      : logi TRUE
##   ..@ acs.units     : Factor w/ 5 levels "count","dollars",..: 1
##   ..@ currency.year : int 2011
##   ..@ estimate      : num [1:980, 1] 228 232 304 158 186 49 453 327 51 366 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:980] "Census Tract 4001, Alameda County, California" "Census Tract 4002, Alameda County, California" "Census Tract 4003, Alameda County, California" "Census Tract 4004, Alameda County, California" ...
##   .. .. ..$ : chr "B02001_005"
##   ..@ standard.error: num [1:980, 1] 76 67.5 72.3 42.6 65.7 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:980] "Census Tract 4001, Alameda County, California" "Census Tract 4002, Alameda County, California" "Census Tract 4003, Alameda County, California" "Census Tract 4004, Alameda County, California" ...
##   .. .. ..$ : chr "B02001_005"

head(A)
## ACS DATA: 
##  2007 -- 2011 ;
##   Estimates w/90% confidence intervals;
##   for different intervals, see confint()
##                                               B02001_005 
## Census Tract 4001, Alameda County, California 228 +/- 125
## Census Tract 4002, Alameda County, California 232 +/- 111
## Census Tract 4003, Alameda County, California 304 +/- 119
## Census Tract 4004, Alameda County, California 158 +/- 70 
## Census Tract 4005, Alameda County, California 186 +/- 108
## Census Tract 4006, Alameda County, California 49 +/- 54

head(A@estimate)
##                                               B02001_005
## Census Tract 4001, Alameda County, California        228
## Census Tract 4002, Alameda County, California        232
## Census Tract 4003, Alameda County, California        304
## Census Tract 4004, Alameda County, California        158
## Census Tract 4005, Alameda County, California        186
## Census Tract 4006, Alameda County, California         49

## IMPORTANT: the tracts in 'A' are not in the same order as the tract
## polygons in 'SFmap' In order to match properly, we need to construct a
## geographic id code for each ACS area The make.geoid() function constructs
## a string that will match the GEOID variable in the slot called SFmap@data

make.geoid = function(A) {
    colz <- colnames(A@geography)
    state2 <- county3 <- tract6 <- bg1 <- ""
    state2 <- sprintf("%02d", A@geography$state)
    if ("county" %in% colz) {
        county3 <- sprintf("%03d", as.numeric(A@geography$county))
    }
    if ("tract" %in% colz) {
        tract6 <- sprintf("%06d", as.numeric(A@geography$tract))
    }
    if ("blockgroup" %in% colz) {
        bg1 <- sprintf("%01d", as.numeric(A@geography$blockgroup))
    }

    return(paste0(state2, county3, tract6, bg1))
}

## use the make.geoid funtion on the 'A' ACS data object
ACSTractCode = make.geoid(A)

## match indices: o[1], o[2], ... will contain the ACS indices that
## correspond to polygons 1,2,... in the shapefile
o = match(SFmap@data$GEOID, ACSTractCode)

## counts of residents, IN SHAPEFILE ORDER
Acount = A@estimate[o]

## For each individual, choose a random location within the tract of
## residence
Adots = dotsInPolys(SFmap, x = Acount)
## Warning: x coerced to integer

## convert the random locations to a data frame for plotting in ggmap
A.df = data.frame(coordinates(Adots)[, 1:2], race = "Asian")

## peek at the structure of A.df
head(A.df)
##        x     y  race
## 1 -122.6 38.12 Asian
## 2 -122.6 38.12 Asian
## 3 -122.6 38.12 Asian
## 4 -122.6 38.11 Asian
## 5 -122.6 38.12 Asian
## 6 -122.6 38.12 Asian

## replot the base map, and this time overlay the estimated density of Asian
## residents
base + stat_density2d(aes(x = x, y = y), data = A.df, bins = 6, geom = "polygon", 
    fill = "purple", alpha = 0.4)
## Warning: Removed 153534 rows containing non-finite values
## (stat_density2d).

plot of chunk unnamed-chunk-2


## just for fun, put the same data on Asian density over an Open Street Map
## at a higher zoom level
qmap("San Francisco CA", zoom = 12, source = "osm") + stat_density2d(aes(x = x, 
    y = y), data = A.df, bins = 6, geom = "polygon", fill = "purple", alpha = 0.4)
## Map from URL :
## http://maps.googleapis.com/maps/api/staticmap?center=San+Francisco+CA&zoom=12&size=%20640x640&maptype=terrain&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
## Information from URL :
## http://maps.googleapis.com/maps/api/geocode/json?address=San+Francisco+CA&sensor=false
## Google Maps API Terms of Service : http://developers.google.com/maps/terms
## Warning: Removed 695351 rows containing non-finite values
## (stat_density2d).

plot of chunk unnamed-chunk-2