Last updated: 2019-07-24

Purpose

Aggregate data on county-level pesticide use (from USGS) into county summary measures (kg applied, bee toxic load) and join the years together.

Data sources

Pesticide data is from the USGS National Pesticide Synthesis Project. Toxicity data were generated as described in the code here, here and here. Data on county acreage are from the US Agricultural Census, as described here. Farm resource regions are from USDA-ERS, as described here and linked here.

Libraries and functions

library(tidyverse)
library(readxl)

# function to standardize compound names to uppercase with no special characters
cmpd_std <- function(x){
  toupper(gsub(",|-| ","",x))
}

# function to standardize fips codes & compound names in USGS dataset
# includes fixing two names with slight inconsistencies
std_fips_cmpd <- function(x, cty_var, st_var){
  x$cmpd_usgs <- cmpd_std(x$COMPOUND)
  x$cmpd_usgs <- recode(x$cmpd_usgs, 
                              AMECTOCTRADIN="AMETOCTRADIN",
                              BACILLUSAMYLOLIQUIFACIEN="BACILLUSAMYLOLIQUEFACIEN")
  x[[cty_var]] <- as.character(x[[cty_var]])
  x$fips.length<-nchar(x[[cty_var]])
  x$fips.add[x$fips.length==1]<-"00"
  x$fips.add[x$fips.length==2]<-"0"
  x$fips.add[x$fips.length==3]<-""
  x[[st_var]] <- as.character(x[[st_var]])
  x$fips.length2<-nchar(x[[st_var]])
  x$fips.add2[x$fips.length2==1]<-"0"
  x$fips.add2[x$fips.length2==2]<-""
  x$fips<-paste(x$fips.add2,x[[st_var]],x$fips.add,x[[cty_var]], sep="")
  x <- dplyr::select(x, -fips.add, -fips.add2, -fips.length, -fips.length2, -COMPOUND)
}

## Function to restore lost zeroes to fips codes when state & county already fused
std_fips2 <- function(x, fips_var){
  x[[fips_var]] <- as.character(x[[fips_var]])
  x$fips.length<-nchar(x[[fips_var]])
  x$fips.add[x$fips.length==4]<-"0"
  x$fips.add[x$fips.length==5]<-""
  x$fips<-paste(x$fips.add,x[[fips_var]], sep="")
  x <- dplyr::select(x, -fips.add, -fips.length)
}


# function to change USGS fips code to updated values
# https://www.census.gov/geo/reference/county-changes.html
# 12025 was changed to 12086 in 1997: 
# 46113 was changed to 46102 in 2015
up_fips <- function(x){
  x$fips[x$fips == "46113"] <- "46102"
  x$fips[x$fips == "12025"] <- "12086"
  return(x)
}

# function to store unique compound names from USGS datasets
cmpd_names <- function(x){
  df <- as.data.frame(x)
  cmpd_usgs <- unique(df$cmpd_usgs)
  year <- unique(df$YEAR)
  y <- as.data.frame(cmpd_usgs) %>%
    mutate(yr = year)
  return(y)
}

# function to calculate toxicity-adjusted pesticide values
tox_val <- function(x){
  x$ct_tox_bil_low <- x$EPEST_LOW_KG/x$ld50_ct_ugbee
  x$ct_tox_bil_high <- x$EPEST_HIGH_KG/x$ld50_ct_ugbee
  x$or_tox_bil_low <- x$EPEST_LOW_KG/x$ld50_or_ugbee
  x$or_tox_bil_high <- x$EPEST_HIGH_KG/x$ld50_or_ugbee
  x
}

# function to aggregate toxicity values by county and category
tox_agg <- function(x){
  y <- x %>%
  group_by(YEAR, fips, cat) %>%
  summarise(kg_low = sum(EPEST_LOW_KG, na.rm=TRUE),
            kg_high = sum(EPEST_HIGH_KG, na.rm=TRUE),
            ct_tox_bil_low = sum(ct_tox_bil_low, na.rm=TRUE),
            ct_tox_bil_high = sum(ct_tox_bil_high, na.rm=TRUE),
            or_tox_bil_low = sum(or_tox_bil_low, na.rm=TRUE),
            or_tox_bil_high = sum(or_tox_bil_high, na.rm=TRUE),
            cmpd_n_low = sum(!is.na(EPEST_LOW_KG)),
            cmpd_n_high = sum(!is.na(EPEST_HIGH_KG)))
  y
}

# This function interpolates pesticide values for 1992-2014, for those combinations that have at least two years of data, and returns a new dataframe with the original and interpolated values combined and a column to indicate if the value was interpolated
interp <- function(x){
  if(length(x$value) > 1){
    tmp <- as.data.frame(approx(x$YEAR, x$value, 
                              method = "linear", 
                              xout = seq(1992,2014,by=1), 
                              rule=2))
    tmp$fips <- unique(x$fips)
    tmp$cat <- unique(x$cat)
    tmp$var <- unique(x$var)
  x <- full_join(x, tmp, by = c("YEAR" = "x", "fips", "cat", "var")) %>%
    rename(value_new = y) %>%
    arrange(YEAR) %>%
    mutate(interp = ifelse(is.na(value), "yes", "no")) %>%
    select(-value)
  return(x)
  }
  else{
  }
}

Load data

Pesticide data

## Load pesticide data
file_names <- list.files(path = "../data_big/usgs_pesticides/county", pattern = ".txt")
setwd("../data_big/usgs_pesticides/county")

## Read in data as list and standardize fips codes
pest_cty <- lapply(file_names, read.delim)
pest_cty_std <- lapply(pest_cty, std_fips_cmpd, 
                       cty_var = "COUNTY_FIPS_CODE", 
                       st_var = "STATE_FIPS_CODE") 
pest_cty_std <- lapply(pest_cty_std, up_fips)

str(pest_cty_std)
List of 25
 $ :'data.frame':   301806 obs. of  7 variables:
  ..$ YEAR            : int [1:301806] 1992 1992 1992 1992 1992 1992 1992 1992 1992 1992 ...
  ..$ STATE_FIPS_CODE : chr [1:301806] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:301806] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:301806] 233 2895 1115 404 1276 ...
  ..$ EPEST_HIGH_KG   : num [1:301806] 914 3821 1836 882 1568 ...
  ..$ cmpd_usgs       : chr [1:301806] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:301806] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   296142 obs. of  7 variables:
  ..$ YEAR            : int [1:296142] 1993 1993 1993 1993 1993 1993 1993 1993 1993 1993 ...
  ..$ STATE_FIPS_CODE : chr [1:296142] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:296142] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:296142] 232 10464 1983 106 812 ...
  ..$ EPEST_HIGH_KG   : num [1:296142] 642 11316 2016 477 1147 ...
  ..$ cmpd_usgs       : chr [1:296142] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:296142] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   307830 obs. of  7 variables:
  ..$ YEAR            : int [1:307830] 1994 1994 1994 1994 1994 1994 1994 1994 1994 1994 ...
  ..$ STATE_FIPS_CODE : chr [1:307830] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:307830] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:307830] 645 2943 897 174 1617 ...
  ..$ EPEST_HIGH_KG   : num [1:307830] 916 3716 1196 341 1750 ...
  ..$ cmpd_usgs       : chr [1:307830] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:307830] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   323230 obs. of  7 variables:
  ..$ YEAR            : int [1:323230] 1995 1995 1995 1995 1995 1995 1995 1995 1995 1995 ...
  ..$ STATE_FIPS_CODE : chr [1:323230] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:323230] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:323230] 170 2060 970 199 4352 ...
  ..$ EPEST_HIGH_KG   : num [1:323230] 615 2697 1197 213 4508 ...
  ..$ cmpd_usgs       : chr [1:323230] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:323230] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   334467 obs. of  7 variables:
  ..$ YEAR            : int [1:334467] 1996 1996 1996 1996 1996 1996 1996 1996 1996 1996 ...
  ..$ STATE_FIPS_CODE : chr [1:334467] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:334467] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:334467] 298 588.2 993.5 37.5 3802.4 ...
  ..$ EPEST_HIGH_KG   : num [1:334467] 1675 2010 1936 256 4017 ...
  ..$ cmpd_usgs       : chr [1:334467] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:334467] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   340426 obs. of  7 variables:
  ..$ YEAR            : int [1:340426] 1997 1997 1997 1997 1997 1997 1997 1997 1997 1997 ...
  ..$ STATE_FIPS_CODE : chr [1:340426] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:340426] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:340426] 365 1984 890 608 829 ...
  ..$ EPEST_HIGH_KG   : num [1:340426] 1481 3691 2260 634 1296 ...
  ..$ cmpd_usgs       : chr [1:340426] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:340426] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   323239 obs. of  7 variables:
  ..$ YEAR            : int [1:323239] 1998 1998 1998 1998 1998 1998 1998 1998 1998 1998 ...
  ..$ STATE_FIPS_CODE : chr [1:323239] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:323239] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:323239] 698 689 5282 168 7058 ...
  ..$ EPEST_HIGH_KG   : num [1:323239] 835 1806 5765 168 7164 ...
  ..$ cmpd_usgs       : chr [1:323239] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:323239] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   327794 obs. of  7 variables:
  ..$ YEAR            : int [1:327794] 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 ...
  ..$ STATE_FIPS_CODE : chr [1:327794] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:327794] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:327794] 2167 1745 387 196 4069 ...
  ..$ EPEST_HIGH_KG   : num [1:327794] 2297 2299 417 206 4224 ...
  ..$ cmpd_usgs       : chr [1:327794] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:327794] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   334102 obs. of  7 variables:
  ..$ YEAR            : int [1:334102] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
  ..$ STATE_FIPS_CODE : chr [1:334102] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:334102] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:334102] 1159.9 10183.1 1233.4 70.3 5664.9 ...
  ..$ EPEST_HIGH_KG   : num [1:334102] 2199.1 10681.3 1635.8 70.5 5705.7 ...
  ..$ cmpd_usgs       : chr [1:334102] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:334102] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   332702 obs. of  7 variables:
  ..$ YEAR            : int [1:332702] 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
  ..$ STATE_FIPS_CODE : chr [1:332702] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:332702] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:332702] 1298 3657 1471 458 7145 ...
  ..$ EPEST_HIGH_KG   : num [1:332702] 1842 3742 1623 458 7290 ...
  ..$ cmpd_usgs       : chr [1:332702] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:332702] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   337607 obs. of  7 variables:
  ..$ YEAR            : int [1:337607] 2002 2002 2002 2002 2002 2002 2002 2002 2002 2002 ...
  ..$ STATE_FIPS_CODE : chr [1:337607] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:337607] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:337607] 32.5 619.6 610.2 507 7052.4 ...
  ..$ EPEST_HIGH_KG   : num [1:337607] 370 1901 671 509 7142 ...
  ..$ cmpd_usgs       : chr [1:337607] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:337607] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   336615 obs. of  7 variables:
  ..$ YEAR            : int [1:336615] 2003 2003 2003 2003 2003 2003 2003 2003 2003 2003 ...
  ..$ STATE_FIPS_CODE : chr [1:336615] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:336615] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:336615] 1110 3878 1278 508 7726 ...
  ..$ EPEST_HIGH_KG   : num [1:336615] 1786 5071 2262 508 8009 ...
  ..$ cmpd_usgs       : chr [1:336615] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:336615] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   350874 obs. of  7 variables:
  ..$ YEAR            : int [1:350874] 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 ...
  ..$ STATE_FIPS_CODE : chr [1:350874] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:350874] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:350874] 2421 499 1652 1883 8493 ...
  ..$ EPEST_HIGH_KG   : num [1:350874] 2552 2729 3019 1891 8633 ...
  ..$ cmpd_usgs       : chr [1:350874] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:350874] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   338288 obs. of  7 variables:
  ..$ YEAR            : int [1:338288] 2005 2005 2005 2005 2005 2005 2005 2005 2005 2005 ...
  ..$ STATE_FIPS_CODE : chr [1:338288] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:338288] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:338288] 1258 753 2713 1954 6725 ...
  ..$ EPEST_HIGH_KG   : num [1:338288] 1359 2378 2895 1960 7046 ...
  ..$ cmpd_usgs       : chr [1:338288] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:338288] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   346582 obs. of  7 variables:
  ..$ YEAR            : int [1:346582] 2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 ...
  ..$ STATE_FIPS_CODE : chr [1:346582] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:346582] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:346582] 2039 1988 1849 439 6141 ...
  ..$ EPEST_HIGH_KG   : num [1:346582] 2125 3068 1964 446 6530 ...
  ..$ cmpd_usgs       : chr [1:346582] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:346582] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   334841 obs. of  7 variables:
  ..$ YEAR            : int [1:334841] 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 ...
  ..$ STATE_FIPS_CODE : chr [1:334841] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:334841] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:334841] 1134 3053 4046 720 13178 ...
  ..$ EPEST_HIGH_KG   : num [1:334841] 1210 3374 4263 732 13272 ...
  ..$ cmpd_usgs       : chr [1:334841] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:334841] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   350694 obs. of  7 variables:
  ..$ YEAR            : int [1:350694] 2008 2008 2008 2008 2008 2008 2008 2008 2008 2008 ...
  ..$ STATE_FIPS_CODE : chr [1:350694] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:350694] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:350694] 1119 1891.4 4801.8 91.7 7336.4 ...
  ..$ EPEST_HIGH_KG   : num [1:350694] 1354 2944 4869 119 7507 ...
  ..$ cmpd_usgs       : chr [1:350694] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:350694] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   370402 obs. of  7 variables:
  ..$ YEAR            : int [1:370402] 2009 2009 2009 2009 2009 2009 2009 2009 2009 2009 ...
  ..$ STATE_FIPS_CODE : chr [1:370402] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:370402] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:370402] 597.5 3351 8158.1 53.8 12369.3 ...
  ..$ EPEST_HIGH_KG   : num [1:370402] 986.4 4858.5 8219.1 98.8 12646.9 ...
  ..$ cmpd_usgs       : chr [1:370402] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:370402] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   372993 obs. of  7 variables:
  ..$ YEAR            : int [1:372993] 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ...
  ..$ STATE_FIPS_CODE : chr [1:372993] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:372993] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:372993] 2482.4 4432.5 1546.7 21.3 5400.4 ...
  ..$ EPEST_HIGH_KG   : num [1:372993] 2490.3 5190.6 1616.3 30.5 5486.2 ...
  ..$ cmpd_usgs       : chr [1:372993] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:372993] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   370198 obs. of  7 variables:
  ..$ YEAR            : int [1:370198] 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
  ..$ STATE_FIPS_CODE : chr [1:370198] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:370198] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:370198] 1561 5733 1460 1352 5404 ...
  ..$ EPEST_HIGH_KG   : num [1:370198] 1596 6290 1849 1378 5522 ...
  ..$ cmpd_usgs       : chr [1:370198] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:370198] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   369394 obs. of  7 variables:
  ..$ YEAR            : int [1:369394] 2012 2012 2012 2012 2012 2012 2012 2012 2012 2012 ...
  ..$ STATE_FIPS_CODE : chr [1:369394] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:369394] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:369394] 2863.3 2854.3 556.9 5.4 9201.2 ...
  ..$ EPEST_HIGH_KG   : num [1:369394] 2874.9 4116.9 749.4 12.3 9455.7 ...
  ..$ cmpd_usgs       : chr [1:369394] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:369394] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   376107 obs. of  7 variables:
  ..$ YEAR            : int [1:376107] 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
  ..$ STATE_FIPS_CODE : chr [1:376107] "41" "41" "53" "53" ...
  ..$ COUNTY_FIPS_CODE: chr [1:376107] "27" "59" "1" "5" ...
  ..$ EPEST_LOW_KG    : num [1:376107] NA NA 8.1 2.7 1.1 14 0.1 16.3 63.6 0.1 ...
  ..$ EPEST_HIGH_KG   : num [1:376107] 0.1 0.5 8.1 2.7 1.1 14 0.1 16.3 63.6 0.1 ...
  ..$ cmpd_usgs       : chr [1:376107] "1METHYLCYCLOPROPENE" "1METHYLCYCLOPROPENE" "1METHYLCYCLOPROPENE" "1METHYLCYCLOPROPENE" ...
  ..$ fips            : chr [1:376107] "41027" "41059" "53001" "53005" ...
 $ :'data.frame':   392433 obs. of  7 variables:
  ..$ YEAR            : int [1:392433] 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 ...
  ..$ STATE_FIPS_CODE : chr [1:392433] "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr [1:392433] "1" "3" "5" "7" ...
  ..$ EPEST_LOW_KG    : num [1:392433] 1699 7514 2614 1259 7590 ...
  ..$ EPEST_HIGH_KG   : num [1:392433] 1886 8472 2889 1278 7756 ...
  ..$ cmpd_usgs       : chr [1:392433] "24D" "24D" "24D" "24D" ...
  ..$ fips            : chr [1:392433] "01001" "01003" "01005" "01007" ...
 $ :'data.frame':   377011 obs. of  7 variables:
  ..$ YEAR            : int [1:377011] 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
  ..$ STATE_FIPS_CODE : chr [1:377011] "41" "41" "41" "41" ...
  ..$ COUNTY_FIPS_CODE: chr [1:377011] "1" "21" "27" "49" ...
  ..$ EPEST_LOW_KG    : num [1:377011] NA NA NA NA NA NA NA NA NA NA ...
  ..$ EPEST_HIGH_KG   : num [1:377011] 0 0.1 1.7 0.1 0 5.7 0 0 0.1 10.6 ...
  ..$ cmpd_usgs       : chr [1:377011] "1METHYLCYCLOPROPENE" "1METHYLCYCLOPROPENE" "1METHYLCYCLOPROPENE" "1METHYLCYCLOPROPENE" ...
  ..$ fips            : chr [1:377011] "41001" "41021" "41027" "41049" ...
 $ :'data.frame':   365547 obs. of  7 variables:
  ..$ YEAR            : int [1:365547] 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 ...
  ..$ STATE_FIPS_CODE : chr [1:365547] "18" "18" "18" "18" ...
  ..$ COUNTY_FIPS_CODE: chr [1:365547] "15" "17" "39" "49" ...
  ..$ EPEST_LOW_KG    : num [1:365547] 0 0 0 0 0 0 0 0 0 0 ...
  ..$ EPEST_HIGH_KG   : num [1:365547] 0 0 0 0 0 0 0 0 0 0 ...
  ..$ cmpd_usgs       : chr [1:365547] "1METHYLCYCLOPROPENE" "1METHYLCYCLOPROPENE" "1METHYLCYCLOPROPENE" "1METHYLCYCLOPROPENE" ...
  ..$ fips            : chr [1:365547] "18015" "18017" "18039" "18049" ...

Toxicity data

## Load key with abbreviated insecticide classes
ins_key <- read.csv("../keys/insecticide_class_key.csv")

## Load LD50 values 
ld50 <- read.csv("../output/ld50_usgs_complete_20190625.csv")
str(ld50)
'data.frame':   276 obs. of  11 variables:
 $ cmpd_usgs  : Factor w/ 138 levels "ABAMECTIN","ACEPHATE",..: 1 2 4 5 6 9 11 12 13 15 ...
 $ exp_grp    : Factor w/ 2 levels "contact","oral": 1 1 1 1 1 1 1 1 1 1 ...
 $ ld50_op    : Factor w/ 2 levels "=",">": 1 1 1 1 1 1 1 1 1 1 ...
 $ ld50_n     : int  2 1 2 1 1 1 1 1 1 2 ...
 $ cat        : Factor w/ 1 level "I": 1 1 1 1 1 1 1 1 1 1 ...
 $ cat_grp    : Factor w/ 36 levels "I-10A","I-10B",..: 29 13 26 12 24 13 12 15 24 12 ...
 $ class      : Factor w/ 24 levels "anthranilic diamide",..: 2 16 14 5 19 16 5 24 19 5 ...
 $ grp_source : Factor w/ 1 level "IRAC": 1 1 1 1 1 1 1 1 1 1 ...
 $ cat_exp_grp: Factor w/ 72 levels "I-10A contact",..: 57 25 51 23 47 25 23 29 47 23 ...
 $ ld50_ugbee : num  0.03 1.2 8.095 0.285 0.033 ...
 $ source     : Factor w/ 8 levels "cat_med","ecotox",..: 5 4 5 4 6 4 4 4 4 5 ...
# Select LD50 columns of interest and join abbreviated insecticide classes
ld50_slim <- ld50 %>%
  select(cmpd_usgs, exp_grp, ld50_op, ld50_ugbee, source, cat, cat_grp, class, grp_source) %>%
  left_join(ins_key, by="class")

# Split LD50 data into contact and oral
ld50_or <- filter(ld50_slim, exp_grp=="oral") %>%
  rename(ld50_or_ugbee = ld50_ugbee)

ld50_ct <- filter(ld50_slim, exp_grp=="contact") %>%
  rename(ld50_ct_ugbee = ld50_ugbee)

ld50_key <- ld50_ct %>%
  left_join(ld50_or, by=c("cmpd_usgs","cat","class_short", "source")) %>%
  select(cmpd_usgs, ld50_ct_ugbee, ld50_or_ugbee, cat, class_short, source)

Check match of compound names

cat_key <- read_excel("../keys/USGS_Pesticide-Category.xlsx", 1) %>%
  rename(cmpd_usgs = compound) %>%
  select(cmpd_usgs, category) %>%
  mutate(data = "ld50 data")

# generate list of compounds in pesticide data, 1997-2012
cmpds <- lapply(pest_cty_std, FUN = cmpd_names) %>%
  do.call(rbind, .) %>%
  filter(yr < 2013, yr > 1996) %>%
  select(cmpd_usgs) %>%
  distinct(cmpd_usgs) %>%
  mutate(source = "pest data")

check <- cmpds %>%
  full_join(cat_key, by = 'cmpd_usgs')
Warning: Column `cmpd_usgs` joining factor and character vector, coercing
into character vector
filter(check, (is.na(check$source) & category=="I"))
          cmpd_usgs source category      data
1 ALPHACYPERMETHRIN   <NA>        I ld50 data
2     CHLORDIMEFORM   <NA>        I ld50 data
3              DNOC   <NA>        I ld50 data
4       FLUVALINATE   <NA>        I ld50 data
5          ISAZOFOS   <NA>        I ld50 data
6        ISOFENPHOS   <NA>        I ld50 data
7          TEMEPHOS   <NA>        I ld50 data
# these are all true zeroes in the pesticide data - compounds not used between 1997-2012 according to the USGS pesticide data website: https://water.usgs.gov/nawqa/pnsp/usage/maps/compound_listing.php

Join use and toxicity data

# Join toxicity data to each dataframe and calculate toxicity units
pest_cty_tox <- lapply(pest_cty_std, left_join, ld50_key, by="cmpd_usgs") %>%
  lapply(tox_val)

Aggregate insecticide use by county

pest_cty_agg <- lapply(pest_cty_tox, tox_agg)

# Change to long form - paste dataframes together, and filter only insecticides until 2014
pest_cty_long <- do.call("rbind", pest_cty_agg) %>%
  ungroup() %>%
  filter(cat=="I" & YEAR < 2015) %>%
  gather("var", "value", kg_low:cmpd_n_high)
str(pest_cty_long)
Classes 'tbl_df', 'tbl' and 'data.frame':   558688 obs. of  5 variables:
 $ YEAR : int  1992 1992 1992 1992 1992 1992 1992 1992 1992 1992 ...
 $ fips : chr  "01001" "01003" "01005" "01007" ...
 $ cat  : Factor w/ 1 level "I": 1 1 1 1 1 1 1 1 1 1 ...
 $ var  : chr  "kg_low" "kg_low" "kg_low" "kg_low" ...
 $ value: num  5475 31626 16000 367 10587 ...

Fill missing pesticide data

# Split data into separate dataframes by cty fips and variable
var_fips <- as.factor(paste(pest_cty_long$var,pest_cty_long$fips))
pest_split_cty <- split(as.data.frame(pest_cty_long), f=var_fips)

# Apply the interpolation function to each element on the list
# Takes several minutes to run
pest_split_cty_new <- lapply(pest_split_cty, interp)

# Reassemble the dataframe 
pest_cty_long_fill <- do.call(rbind, pest_split_cty_new) 
str(pest_cty_long_fill)
'data.frame':   563776 obs. of  6 variables:
 $ YEAR     : num  1992 1993 1994 1995 1996 ...
 $ fips     : chr  "01001" "01001" "01001" "01001" ...
 $ cat      : Factor w/ 1 level "I": 1 1 1 1 1 1 1 1 1 1 ...
 $ var      : chr  "cmpd_n_high" "cmpd_n_high" "cmpd_n_high" "cmpd_n_high" ...
 $ value_new: num  39 41 41 41 41 43 40 34 35 34 ...
 $ interp   : chr  "no" "no" "no" "no" ...
# Create key for interpolation
interp <- pest_cty_long_fill %>%
  filter(var == "kg_low") %>%
  select(YEAR, fips, interp)

# Spread data back to wide form
pest_cty_fill <- pest_cty_long_fill %>%
  select(-interp) %>%
  spread(key = var, value = value_new) %>%
  left_join(interp, by = c("YEAR", "fips")) %>%
  rename(interp_pest = interp) %>%
  arrange(fips, YEAR) %>%
  filter(YEAR < 2015)

Census data

# Load county-level acreage data 
census <- read.csv("../output_big/nass_survey/census_agland_cleaned.csv",
                   colClasses=c("fips"="character")) %>%
  select(-region_name, -region_code)
str(census)
'data.frame':   12277 obs. of  10 variables:
 $ YEAR             : int  1997 2002 2007 2012 1997 2002 2007 2012 1997 2002 ...
 $ fips             : chr  "01001" "01001" "01001" "01001" ...
 $ crop_ha          : num  19859 20900 17138 16711 51226 ...
 $ crop_ha_interp   : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
 $ past_ha          : num  6253 6645 9619 9158 3654 ...
 $ past_ha_interp   : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
 $ trt_ins_ha       : num  2891 4731 3416 5568 27759 ...
 $ trt_ins_ha_interp: Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
 $ acres            : int  380440 380440 380440 380440 1017498 1017498 1017498 1017498 566321 566321 ...
 $ ha               : num  153959 153959 153959 153959 411767 ...

Region data

reg_key <- read.csv("../keys/usda_farm_regions_cty.csv") %>%
  std_fips2(fips_var = "fips")

# fill in a couple missing values
# re: 46102, see https://www.census.gov/geo/reference/county-changes.html (tab 2010)
# re: 08014, see https://www.census.gov/geo/reference/county-changes.html (tab 2000)
reg_key <- rbind(reg_key, c("46102", 3, "Northern Great Plains"))
reg_key <- rbind(reg_key, c("08014", 3, "Northern Great Plains"))

str(reg_key)
'data.frame':   3114 obs. of  3 variables:
 $ fips       : chr  "01001" "01003" "01005" "01007" ...
 $ region_code: chr  "6" "6" "6" "5" ...
 $ region_name: Factor w/ 9 levels "Basin and Range",..: 9 9 9 2 2 9 9 2 9 2 ...

Join pesticide data and census data

# dataset with interpolated pesticide values
pest_cty_final <- pest_cty_fill %>%
  full_join(census, by = c("YEAR", "fips")) %>%
  left_join(reg_key, by = "fips") %>%
  select(YEAR, region_name, region_code,fips, cat,
         kg_low, kg_high, ct_tox_bil_low, ct_tox_bil_high,
         or_tox_bil_low, or_tox_bil_high, cmpd_n_low, cmpd_n_high, interp_pest, ha,
         crop_ha, past_ha, trt_ins_ha, crop_ha_interp, past_ha_interp, trt_ins_ha_interp) 
str(pest_cty_final)
'data.frame':   70493 obs. of  21 variables:
 $ YEAR             : num  1992 1993 1994 1995 1996 ...
 $ region_name      : Factor w/ 9 levels "Basin and Range",..: 9 9 9 9 9 9 9 9 9 9 ...
 $ region_code      : chr  "6" "6" "6" "6" ...
 $ fips             : chr  "01001" "01001" "01001" "01001" ...
 $ cat              : Factor w/ 1 level "I": 1 1 1 1 1 1 1 1 1 1 ...
 $ kg_low           : num  5475 5372 13268 8423 4634 ...
 $ kg_high          : num  8580 12101 49208 17687 14077 ...
 $ ct_tox_bil_low   : num  35803 30494 49646 27243 11969 ...
 $ ct_tox_bil_high  : num  51572 61053 154767 75692 56429 ...
 $ or_tox_bil_low   : num  2629 5874 46209 18584 4193 ...
 $ or_tox_bil_high  : num  6873 16622 111091 33616 28348 ...
 $ cmpd_n_low       : num  29 24 25 17 25 21 20 17 22 17 ...
 $ cmpd_n_high      : num  39 41 41 41 41 43 40 34 35 34 ...
 $ interp_pest      : chr  "no" "no" "no" "no" ...
 $ ha               : num  NA NA NA NA NA ...
 $ crop_ha          : num  NA NA NA NA NA ...
 $ past_ha          : num  NA NA NA NA NA ...
 $ trt_ins_ha       : num  NA NA NA NA NA ...
 $ crop_ha_interp   : Factor w/ 2 levels "no","yes": NA NA NA NA NA 1 NA NA NA NA ...
 $ past_ha_interp   : Factor w/ 2 levels "no","yes": NA NA NA NA NA 1 NA NA NA NA ...
 $ trt_ins_ha_interp: Factor w/ 2 levels "no","yes": NA NA NA NA NA 1 NA NA NA NA ...
# dataset without interpolation, but with class data intact
pest_cty_detail <- do.call("rbind", pest_cty_tox) 

pest_cty_final_class <- pest_cty_detail %>%
  filter(cat=="I" & YEAR < 2014) %>%
  group_by(YEAR, STATE_FIPS_CODE, COUNTY_FIPS_CODE, fips, cat, class_short) %>%
  summarise(kg_low = sum(EPEST_LOW_KG, na.rm=TRUE),
            kg_high = sum(EPEST_HIGH_KG, na.rm=TRUE),
            ct_tox_bil_low = sum(ct_tox_bil_low, na.rm=TRUE),
            ct_tox_bil_high = sum(ct_tox_bil_high, na.rm=TRUE),
            or_tox_bil_low = sum(or_tox_bil_low, na.rm=TRUE),
            or_tox_bil_high = sum(or_tox_bil_high, na.rm=TRUE),
            cmpd_n_low = sum(!is.na(EPEST_LOW_KG)),
            cmpd_n_high = sum(!is.na(EPEST_HIGH_KG))) %>%
  full_join(census, by = c("YEAR", "fips")) %>%
  left_join(reg_key, by = "fips") 
Warning: Factor `cat` contains implicit NA, consider using
`forcats::fct_explicit_na`

Warning: Factor `cat` contains implicit NA, consider using
`forcats::fct_explicit_na`
str(pest_cty_final_class)
Classes 'grouped_df', 'tbl_df', 'tbl' and 'data.frame': 451786 obs. of  24 variables:
 $ YEAR             : int  1992 1992 1992 1992 1992 1992 1992 1992 1992 1992 ...
 $ STATE_FIPS_CODE  : chr  "1" "1" "1" "1" ...
 $ COUNTY_FIPS_CODE : chr  "1" "1" "1" "1" ...
 $ fips             : chr  "01001" "01001" "01001" "01001" ...
 $ cat              : Factor w/ 1 level "I": 1 1 1 1 1 1 1 1 1 1 ...
 $ class_short      : Factor w/ 10 levels "BIO","CARB","FIP",..: 1 2 6 7 8 9 1 2 6 7 ...
 $ kg_low           : num  0 744.7 128 4201 5.1 ...
 $ kg_high          : num  4 1366.8 256.6 6387 5.2 ...
 $ ct_tox_bil_low   : num  0.00 2.73e+03 8.87 2.45e+04 4.44e-02 ...
 $ ct_tox_bil_high  : num  4.00e-02 5.74e+03 1.08e+02 3.27e+04 4.53e-02 ...
 $ or_tox_bil_low   : num  0 683.89 3.71 1494.87 0 ...
 $ or_tox_bil_high  : num  0.037 794.193 17.884 4635.601 0 ...
 $ cmpd_n_low       : int  0 5 2 15 1 6 0 4 2 15 ...
 $ cmpd_n_high      : int  1 6 4 19 1 8 1 6 4 19 ...
 $ crop_ha          : num  NA NA NA NA NA NA NA NA NA NA ...
 $ crop_ha_interp   : Factor w/ 2 levels "no","yes": NA NA NA NA NA NA NA NA NA NA ...
 $ past_ha          : num  NA NA NA NA NA NA NA NA NA NA ...
 $ past_ha_interp   : Factor w/ 2 levels "no","yes": NA NA NA NA NA NA NA NA NA NA ...
 $ trt_ins_ha       : num  NA NA NA NA NA NA NA NA NA NA ...
 $ trt_ins_ha_interp: Factor w/ 2 levels "no","yes": NA NA NA NA NA NA NA NA NA NA ...
 $ acres            : int  NA NA NA NA NA NA NA NA NA NA ...
 $ ha               : num  NA NA NA NA NA NA NA NA NA NA ...
 $ region_code      : chr  "6" "6" "6" "6" ...
 $ region_name      : Factor w/ 9 levels "Basin and Range",..: 9 9 9 9 9 9 9 9 9 9 ...
 - attr(*, "groups")=Classes 'tbl_df', 'tbl' and 'data.frame':  66961 obs. of  6 variables:
  ..$ YEAR            : int  1992 1992 1992 1992 1992 1992 1992 1992 1992 1992 ...
  ..$ STATE_FIPS_CODE : chr  "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr  "1" "101" "103" "105" ...
  ..$ fips            : chr  "01001" "01101" "01103" "01105" ...
  ..$ cat             : Factor w/ 1 level "I": 1 1 1 1 1 1 1 1 1 1 ...
  ..$ .rows           :List of 66961
  .. ..$ : int  1 2 3 4 5 6
  .. ..$ : int  7 8 9 10 11 12
  .. ..$ : int  13 14 15 16 17 18 19
  .. ..$ : int  20 21 22 23 24 25
  .. ..$ : int  26 27 28 29 30 31
  .. ..$ : int  32 33 34 35 36 37
  .. ..$ : int  38 39 40 41 42 43
  .. ..$ : int  44 45 46 47 48 49
  .. ..$ : int  50 51 52 53 54 55
  .. ..$ : int  56 57 58 59 60 61
  .. ..$ : int  62 63 64 65 66 67
  .. ..$ : int  68 69 70 71 72 73
  .. ..$ : int  74 75 76 77 78 79
  .. ..$ : int  80 81 82 83 84 85
  .. ..$ : int  86 87 88 89 90 91
  .. ..$ : int  92 93 94 95 96 97
  .. ..$ : int  98 99 100 101 102 103
  .. ..$ : int  104 105 106 107 108 109
  .. ..$ : int  110 111 112 113 114 115
  .. ..$ : int  116 117 118 119 120 121 122
  .. ..$ : int  123 124 125 126 127 128
  .. ..$ : int  129 130 131 132 133 134
  .. ..$ : int  135 136 137 138 139 140
  .. ..$ : int  141 142 143 144 145 146
  .. ..$ : int  147 148 149 150 151 152
  .. ..$ : int  153 154 155 156 157 158
  .. ..$ : int  159 160 161 162 163 164
  .. ..$ : int  165 166 167 168 169 170
  .. ..$ : int  171 172 173 174 175 176
  .. ..$ : int  177 178 179 180 181 182
  .. ..$ : int  183 184 185 186 187 188 189
  .. ..$ : int  190 191 192 193 194 195
  .. ..$ : int  196 197 198 199 200 201
  .. ..$ : int  202 203 204 205 206 207
  .. ..$ : int  208 209 210 211 212 213
  .. ..$ : int  214 215 216 217 218 219
  .. ..$ : int  220 221 222 223 224 225
  .. ..$ : int  226 227 228 229 230 231
  .. ..$ : int  232 233 234 235 236 237
  .. ..$ : int  238 239 240 241 242 243
  .. ..$ : int  244 245 246 247 248 249
  .. ..$ : int  250 251 252 253 254 255
  .. ..$ : int  256 257 258 259 260 261
  .. ..$ : int  262 263 264 265 266 267
  .. ..$ : int  268 269 270 271 272 273 274
  .. ..$ : int  275 276 277 278 279 280
  .. ..$ : int  281 282 283 284 285 286
  .. ..$ : int  287 288 289 290 291 292
  .. ..$ : int  293 294 295 296 297 298
  .. ..$ : int  299 300 301 302 303 304
  .. ..$ : int  305 306 307 308 309 310
  .. ..$ : int  311 312 313 314 315 316
  .. ..$ : int  317 318 319 320 321 322
  .. ..$ : int  323 324 325 326 327 328
  .. ..$ : int  329 330 331 332 333 334 335
  .. ..$ : int  336 337 338 339 340 341 342
  .. ..$ : int  343 344 345 346 347 348
  .. ..$ : int  349 350 351 352 353 354 355
  .. ..$ : int  356 357 358 359 360 361
  .. ..$ : int  362 363 364 365 366 367
  .. ..$ : int  368 369 370 371 372 373 374
  .. ..$ : int  375 376 377 378 379 380
  .. ..$ : int  381 382 383 384 385 386
  .. ..$ : int  387 388 389 390 391 392 393
  .. ..$ : int  394 395 396 397 398 399
  .. ..$ : int  400 401 402 403 404 405
  .. ..$ : int  406 407 408 409 410 411
  .. ..$ : int  412 413 414 415
  .. ..$ : int  416 417 418 419 420 421 422
  .. ..$ : int  423 424 425 426
  .. ..$ : int  427 428 429 430 431 432 433
  .. ..$ : int  434 435 436 437 438 439 440
  .. ..$ : int  441 442 443 444 445 446
  .. ..$ : int  447 448 449 450 451 452 453
  .. ..$ : int  454 455 456 457 458 459 460
  .. ..$ : int  461 462 463 464 465
  .. ..$ : int  466 467 468 469 470 471 472
  .. ..$ : int  473 474 475 476 477 478 479
  .. ..$ : int  480 481 482 483 484 485
  .. ..$ : int  486 487 488 489 490 491 492
  .. ..$ : int  493 494 495 496 497 498 499
  .. ..$ : int  500 501 502 503 504 505 506
  .. ..$ : int  507 508 509 510 511 512 513
  .. ..$ : int  514 515 516 517 518
  .. ..$ : int  519 520 521 522 523 524
  .. ..$ : int  525 526 527 528 529 530 531
  .. ..$ : int  532 533 534 535 536 537
  .. ..$ : int  538 539 540 541 542 543
  .. ..$ : int  544 545 546 547 548 549
  .. ..$ : int  550 551 552 553 554 555
  .. ..$ : int  556 557 558 559 560 561 562
  .. ..$ : int  563 564 565 566 567 568 569
  .. ..$ : int  570 571 572 573 574
  .. ..$ : int  575 576 577 578 579 580 581
  .. ..$ : int  582 583 584 585 586 587 588
  .. ..$ : int  589 590 591 592 593 594 595
  .. ..$ : int  596 597 598 599 600 601 602
  .. ..$ : int  603 604 605 606 607 608
  .. ..$ : int  609 610 611 612 613 614 615
  .. .. [list output truncated]
  ..- attr(*, ".drop")= logi TRUE
# dataset without interpolation, but with source data intact
pest_cty_final_source <- pest_cty_detail %>%
  filter(cat=="I" & YEAR < 2014) %>%
  group_by(YEAR, STATE_FIPS_CODE, COUNTY_FIPS_CODE, fips, cat, source) %>%
  summarise(kg_low = sum(EPEST_LOW_KG, na.rm=TRUE),
            kg_high = sum(EPEST_HIGH_KG, na.rm=TRUE),
            ct_tox_bil_low = sum(ct_tox_bil_low, na.rm=TRUE),
            ct_tox_bil_high = sum(ct_tox_bil_high, na.rm=TRUE),
            or_tox_bil_low = sum(or_tox_bil_low, na.rm=TRUE),
            or_tox_bil_high = sum(or_tox_bil_high, na.rm=TRUE),
            cmpd_n_low = sum(!is.na(EPEST_LOW_KG)),
            cmpd_n_high = sum(!is.na(EPEST_HIGH_KG))) %>%
  full_join(census, by = c("YEAR", "fips")) %>%
  left_join(reg_key, by = "fips") 
Warning: Factor `cat` contains implicit NA, consider using
`forcats::fct_explicit_na`

Warning: Factor `cat` contains implicit NA, consider using
`forcats::fct_explicit_na`
str(pest_cty_final_source)
Classes 'grouped_df', 'tbl_df', 'tbl' and 'data.frame': 330976 obs. of  24 variables:
 $ YEAR             : int  1992 1992 1992 1992 1992 1992 1992 1992 1992 1992 ...
 $ STATE_FIPS_CODE  : chr  "1" "1" "1" "1" ...
 $ COUNTY_FIPS_CODE : chr  "1" "1" "1" "1" ...
 $ fips             : chr  "01001" "01001" "01001" "01001" ...
 $ cat              : Factor w/ 1 level "I": 1 1 1 1 1 1 1 1 1 1 ...
 $ source           : Factor w/ 8 levels "cat_med","ecotox",..: 2 4 5 6 7 8 2 4 5 6 ...
 $ kg_low           : num  93.1 4937.7 356.1 87.4 0 ...
 $ kg_high          : num  93.1 6152.6 2002 97.7 47 ...
 $ ct_tox_bil_low   : num  1779.9 27641.1 6353.7 28.2 0 ...
 $ ct_tox_bil_high  : num  1779.9 33108.8 16606.7 49.1 11.5 ...
 $ or_tox_bil_low   : num  0 1635 422 571 0 ...
 $ or_tox_bil_high  : num  0 2575 3697 578 0 ...
 $ cmpd_n_low       : int  1 15 11 1 0 1 1 15 10 1 ...
 $ cmpd_n_high      : int  1 16 16 3 1 2 1 16 16 3 ...
 $ crop_ha          : num  NA NA NA NA NA NA NA NA NA NA ...
 $ crop_ha_interp   : Factor w/ 2 levels "no","yes": NA NA NA NA NA NA NA NA NA NA ...
 $ past_ha          : num  NA NA NA NA NA NA NA NA NA NA ...
 $ past_ha_interp   : Factor w/ 2 levels "no","yes": NA NA NA NA NA NA NA NA NA NA ...
 $ trt_ins_ha       : num  NA NA NA NA NA NA NA NA NA NA ...
 $ trt_ins_ha_interp: Factor w/ 2 levels "no","yes": NA NA NA NA NA NA NA NA NA NA ...
 $ acres            : int  NA NA NA NA NA NA NA NA NA NA ...
 $ ha               : num  NA NA NA NA NA NA NA NA NA NA ...
 $ region_code      : chr  "6" "6" "6" "6" ...
 $ region_name      : Factor w/ 9 levels "Basin and Range",..: 9 9 9 9 9 9 9 9 9 9 ...
 - attr(*, "groups")=Classes 'tbl_df', 'tbl' and 'data.frame':  66961 obs. of  6 variables:
  ..$ YEAR            : int  1992 1992 1992 1992 1992 1992 1992 1992 1992 1992 ...
  ..$ STATE_FIPS_CODE : chr  "1" "1" "1" "1" ...
  ..$ COUNTY_FIPS_CODE: chr  "1" "101" "103" "105" ...
  ..$ fips            : chr  "01001" "01101" "01103" "01105" ...
  ..$ cat             : Factor w/ 1 level "I": 1 1 1 1 1 1 1 1 1 1 ...
  ..$ .rows           :List of 66961
  .. ..$ : int  1 2 3 4 5 6
  .. ..$ : int  7 8 9 10 11 12
  .. ..$ : int  13 14 15 16 17 18 19
  .. ..$ : int  20 21 22 23 24 25
  .. ..$ : int  26 27 28 29 30 31
  .. ..$ : int  32 33 34 35 36 37
  .. ..$ : int  38 39 40 41 42 43
  .. ..$ : int  44 45 46 47
  .. ..$ : int  48 49 50 51 52 53
  .. ..$ : int  54 55 56 57
  .. ..$ : int  58 59 60 61 62 63
  .. ..$ : int  64 65 66 67
  .. ..$ : int  68 69 70 71 72 73
  .. ..$ : int  74 75 76 77 78 79
  .. ..$ : int  80 81 82 83 84 85
  .. ..$ : int  86 87 88 89
  .. ..$ : int  90 91 92 93 94 95
  .. ..$ : int  96 97 98 99 100 101
  .. ..$ : int  102 103 104 105 106 107
  .. ..$ : int  108 109 110 111 112
  .. ..$ : int  113 114 115 116 117 118
  .. ..$ : int  119 120 121 122
  .. ..$ : int  123 124 125 126 127 128
  .. ..$ : int  129 130 131 132 133 134
  .. ..$ : int  135 136 137 138 139 140
  .. ..$ : int  141 142 143 144 145 146
  .. ..$ : int  147 148 149 150
  .. ..$ : int  151 152 153 154
  .. ..$ : int  155 156 157 158 159 160
  .. ..$ : int  161 162 163 164 165 166
  .. ..$ : int  167 168 169 170 171 172 173
  .. ..$ : int  174 175 176 177 178 179
  .. ..$ : int  180 181 182 183
  .. ..$ : int  184 185 186 187 188 189
  .. ..$ : int  190 191 192 193 194 195
  .. ..$ : int  196 197 198 199 200 201
  .. ..$ : int  202 203 204 205 206 207
  .. ..$ : int  208 209 210 211 212 213
  .. ..$ : int  214 215 216 217 218 219
  .. ..$ : int  220 221 222 223 224 225
  .. ..$ : int  226 227 228 229 230 231
  .. ..$ : int  232 233 234 235 236 237
  .. ..$ : int  238 239 240 241 242 243
  .. ..$ : int  244 245 246 247 248 249
  .. ..$ : int  250 251 252 253 254 255 256
  .. ..$ : int  257 258 259 260 261 262
  .. ..$ : int  263 264 265 266 267 268
  .. ..$ : int  269 270 271 272 273 274
  .. ..$ : int  275 276 277 278 279 280
  .. ..$ : int  281 282 283 284 285 286
  .. ..$ : int  287 288 289 290
  .. ..$ : int  291 292 293 294 295 296
  .. ..$ : int  297 298 299 300
  .. ..$ : int  301 302 303 304 305 306
  .. ..$ : int  307 308 309 310 311 312 313
  .. ..$ : int  314 315 316 317 318 319 320
  .. ..$ : int  321 322 323 324 325 326
  .. ..$ : int  327 328 329 330 331 332 333
  .. ..$ : int  334 335 336 337 338 339
  .. ..$ : int  340 341 342 343 344 345
  .. ..$ : int  346 347 348 349 350 351 352
  .. ..$ : int  353 354 355 356 357 358
  .. ..$ : int  359 360 361 362 363 364
  .. ..$ : int  365 366 367 368 369 370 371
  .. ..$ : int  372 373 374 375 376 377
  .. ..$ : int  378 379 380 381 382 383
  .. ..$ : int  384 385 386 387 388 389
  .. ..$ : int  390 391 392 393
  .. ..$ : int  394 395 396 397 398
  .. ..$ : int  399 400 401 402
  .. ..$ : int  403 404 405 406 407
  .. ..$ : int  408 409 410 411 412
  .. ..$ : int  413 414 415 416 417
  .. ..$ : int  418 419 420 421 422
  .. ..$ : int  423 424 425 426
  .. ..$ : int  427 428 429 430
  .. ..$ : int  431 432 433 434 435
  .. ..$ : int  436 437 438 439 440
  .. ..$ : int  441 442 443 444 445 446
  .. ..$ : int  447 448 449 450 451
  .. ..$ : int  452 453 454 455 456
  .. ..$ : int  457 458 459 460 461
  .. ..$ : int  462 463 464 465 466
  .. ..$ : int  467 468 469
  .. ..$ : int  470 471 472 473
  .. ..$ : int  474 475 476 477 478
  .. ..$ : int  479 480 481 482
  .. ..$ : int  483 484 485 486 487 488
  .. ..$ : int  489 490 491 492 493 494
  .. ..$ : int  495 496 497 498 499 500
  .. ..$ : int  501 502 503 504 505
  .. ..$ : int  506 507 508 509 510
  .. ..$ : int  511 512 513 514
  .. ..$ : int  515 516 517 518 519
  .. ..$ : int  520 521 522 523 524
  .. ..$ : int  525 526 527 528 529
  .. ..$ : int  530 531 532 533 534
  .. ..$ : int  535 536 537 538
  .. ..$ : int  539 540 541 542
  .. .. [list output truncated]
  ..- attr(*, ".drop")= logi TRUE

Examine missing data

# select census year data and insecticide data, then find incomplete cases
pest_cty_miss <- pest_cty_final %>%
  filter(YEAR %in% c(1997,2002,2007,2012)) %>%
  filter(!complete.cases(.))

# counties with missing insecticide data in all years include
# Alpine Cty, CA (part of Yosemite National Park)
# Suffolk Cty, MA (Boston)
# Bronx, NY
# New York City
# Richmond Cty, NY (Staten Island)
# Arlington County, VA

Fill missing data

pest_cty_comp <- pest_cty_final %>%
  replace_na(replace = list(cat = "I", 
                            kg_low = 0, 
                            kg_high = 0,
                            ct_tox_bil_low = 0,
                            ct_tox_bil_high = 0,
                            or_tox_bil_low = 0,
                            or_tox_bil_high = 0,
                            cmpd_n_low = 0,
                            cmpd_n_high = 0,
                            interp_pest = "yes"))

Export data

write.csv(pest_cty_comp, "../output_big/bee_tox_index_cty.csv", row.names=FALSE)
write.csv(pest_cty_final_class, "../output_big/bee_tox_index_cty_class.csv", row.names=FALSE)
write.csv(pest_cty_final_source, "../output_big/bee_tox_index_cty_source.csv", row.names=FALSE)

Session information

sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.6

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] readxl_1.3.1    forcats_0.4.0   stringr_1.4.0   dplyr_0.8.3    
 [5] purrr_0.3.2     readr_1.3.1     tidyr_0.8.3     tibble_2.1.3   
 [9] ggplot2_3.2.0   tidyverse_1.2.1

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.1       cellranger_1.1.0 pillar_1.4.2     compiler_3.6.1  
 [5] tools_3.6.1      zeallot_0.1.0    digest_0.6.20    lubridate_1.7.4 
 [9] jsonlite_1.6     evaluate_0.14    nlme_3.1-140     gtable_0.3.0    
[13] lattice_0.20-38  pkgconfig_2.0.2  rlang_0.4.0      cli_1.1.0       
[17] rstudioapi_0.10  yaml_2.2.0       haven_2.1.1      xfun_0.8        
[21] withr_2.1.2      xml2_1.2.0       httr_1.4.0       knitr_1.23      
[25] vctrs_0.2.0      hms_0.5.0        generics_0.0.2   fs_1.3.1        
[29] grid_3.6.1       tidyselect_0.2.5 glue_1.3.1       R6_2.4.0        
[33] rmarkdown_1.14   modelr_0.1.4     magrittr_1.5     backports_1.1.4 
[37] scales_1.0.0     htmltools_0.3.6  rvest_0.3.4      assertthat_0.2.1
[41] colorspace_1.4-1 stringi_1.4.3    lazyeval_0.2.2   munsell_0.5.0   
[45] broom_0.5.2      crayon_1.3.4    

This R Markdown site was created with workflowr