Generate keys to convert land use to insecticide toxic load

Last updated: 2021-10-20

Purpose

This code generates reclassification tables to convert CDL land use types into estimated insecticide toxic load in units of lethal doses to honey bees, for a particular state and year.

Libraries & functions

library(tidyverse)

── Attaching packages ──────────────────────────────────────────────────── tidyverse 1.2.1 ──

✔ ggplot2 3.2.0     ✔ purrr   0.3.2
✔ tibble  2.1.3     ✔ dplyr   0.8.3
✔ tidyr   1.1.0     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.4.0

── Conflicts ─────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Load data

# Key template with all CDL land use categories & corresponding USGS crop categories
cdl_key <- read.csv("../keys/cdl_reclass.csv") %>%
  select(value,
         Categories,
         crop = USGS_group) %>%
  mutate(crop = as.character(crop)) %>%
  filter(crop != "Double")

cdl_key_CA <- read.csv("../keys/cdl_reclass_CA.csv") %>%
  select(value,
         Categories,
         crop = USGS_group) %>%
  mutate(crop = as.character(crop)) %>%
  filter(crop != "Double")

# Bee tox/pesticide data
beetox <- read.csv("../output_big/bee_tox_index_state_yr_cat_20210625.csv")

# Double crop data
double <- read.csv("../output_big/beetox_I_cdl_reclass_dblcrops_20210625.csv")
double_CA <- read.csv("../output_big/beetox_I_cdl_reclass_dblcrops_CA_20210625.csv")

Organize, summarize, join datasets

# filter and summarize data for all states except California
# generate reclass table
data <- beetox %>%
  filter(cat=="I", STATE_ALPHA != "CA") %>% #choose only insecticides + exclude CA, years pre 2015
    mutate(ld50_ct_ha_bil = ld50_ct_ha/10^9,
            ld50_or_ha_bil = ld50_or_ha/10^9) %>%
    select(state_fips = State_FIPS_code, 
           state_alpha = STATE_ALPHA, 
           year = Year, 
           cat, 
           crop, 
           kg_ha,
           ld50_ct_ha_bil,
           ld50_or_ha_bil,
           source) %>%
    ungroup() %>%
    mutate(crop = as.character(crop)) %>%
    full_join(cdl_key, by="crop") %>%
    complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop)) %>%
    mutate(unsurveyed = ifelse(crop=="Not_surveyed",1,0),
           noncrop = ifelse(crop=="Non_crop",1,0)) %>%
    rename(crop_usgs = crop) %>%
    select(year, state_alpha, state_fips, value, Categories, crop_usgs, kg_ha, ld50_ct_ha_bil, ld50_or_ha_bil, unsurveyed, noncrop, source) %>%
  bind_rows(double) %>%
  mutate(unsurveyed = ifelse(is.na(unsurveyed), 1, unsurveyed)) %>%
      arrange(year, state_alpha, value) %>%
  filter(year < 2015)

# filter and summarize data for California
# generate reclass table
data_CA <- beetox %>%
  filter(cat=="I", STATE_ALPHA == "CA") %>% #choose only insecticides + CA, years pre 2015
    mutate(ld50_ct_ha_bil = ld50_ct_ha/10^9,
            ld50_or_ha_bil = ld50_or_ha/10^9) %>%
    select(state_fips = State_FIPS_code, 
           state_alpha = STATE_ALPHA, 
           year = Year, 
           cat, 
           crop, 
           kg_ha,
           ld50_ct_ha_bil,
           ld50_or_ha_bil,
           source) %>%
    ungroup() %>%
    mutate(crop = as.character(crop)) %>%
    full_join(cdl_key_CA, by="crop") %>%
    complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop)) %>%
    drop_na(state_alpha, year) %>%
    mutate(unsurveyed = ifelse(crop=="Not_surveyed",1,0),
           noncrop = ifelse(crop=="Non_crop",1,0)) %>%
    rename(crop_usgs = crop) %>%
    select(year, state_alpha, state_fips, value, Categories, crop_usgs, kg_ha, ld50_ct_ha_bil, ld50_or_ha_bil, unsurveyed, noncrop, source) %>%
  bind_rows(double_CA) %>%
  mutate(unsurveyed = ifelse(is.na(unsurveyed), 1, unsurveyed)) %>%
      arrange(year, state_alpha, value) %>%
  filter(year < 2015)

# combine into full dataset
table(data_CA$year) # check for correct number of rows per year


1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 
 131  131  131  131  131  131  131  131  131  131  131  131  131  131  131 
2012 2013 2014 
 131  131  131

table(data$year)


1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 
6272 6272 6272 6272 6272 6272 6272 6272 6272 6272 6272 6272 6272 6272 6272 
2012 2013 2014 
6272 6272 6272

data_full <- bind_rows(data, data_CA) %>% # bind rows
  filter(!is.na(year) & !is.na(state_fips)) 
table(data_full$year) # 131 x 48 states = 6288


1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 
6288 6288 6288 6288 6288 6288 6288 6288 6288 6288 6288 6288 6288 6288 6288 
2012 2013 2014 
6288 6288 6288

summary(data_full)

      year      state_alpha          state_fips        value      
 Min.   :1997   Length:113184      Min.   : 1.00   Min.   :  1.0  
 1st Qu.:2001   Class :character   1st Qu.:18.75   1st Qu.: 43.0  
 Median :2006   Mode  :character   Median :30.50   Median : 77.0  
 Mean   :2006                      Mean   :30.19   Mean   :120.9  
 3rd Qu.:2010                      3rd Qu.:42.50   3rd Qu.:218.0  
 Max.   :2014                      Max.   :56.00   Max.   :254.0  
                                                                  
  Categories         crop_usgs             kg_ha       ld50_ct_ha_bil  
 Length:113184      Length:113184      Min.   : 0.00   Min.   :  0.00  
 Class :character   Class :character   1st Qu.: 0.09   1st Qu.:  0.86  
 Mode  :character   Mode  :character   Median : 0.70   Median :  4.87  
                                       Mean   : 1.44   Mean   :  8.83  
                                       3rd Qu.: 1.81   3rd Qu.: 11.32  
                                       Max.   :24.52   Max.   :338.59  
                                       NA's   :54684   NA's   :54684   
 ld50_or_ha_bil     unsurveyed        noncrop          source         
 Min.   :  0.00   Min.   :0.0000   Min.   :0.0000   Length:113184     
 1st Qu.:  1.35   1st Qu.:0.0000   1st Qu.:0.0000   Class :character  
 Median :  7.66   Median :0.0000   Median :0.0000   Mode  :character  
 Mean   : 12.26   Mean   :0.2635   Mean   :0.1527                     
 3rd Qu.: 17.81   3rd Qu.:1.0000   3rd Qu.:0.0000                     
 Max.   :287.83   Max.   :1.0000   Max.   :1.0000                     
 NA's   :54684

str(data_full)

Classes 'tbl_df', 'tbl' and 'data.frame':   113184 obs. of  12 variables:
 $ year          : int  1997 1997 1997 1997 1997 1997 1997 1997 1997 1997 ...
 $ state_alpha   : chr  "AL" "AL" "AL" "AL" ...
 $ state_fips    : int  1 1 1 1 1 1 1 1 1 1 ...
 $ value         : int  1 2 3 4 5 6 10 11 12 13 ...
 $ Categories    : chr  "Corn" "Cotton" "Rice" "Sorghum" ...
 $ crop_usgs     : chr  "Corn" "Cotton" "Rice" "Other_crops" ...
 $ kg_ha         : num  0.0607 0.9348 NA 0.7594 0.0578 ...
 $ ld50_ct_ha_bil: num  0.38 4.976 NA 1.995 0.187 ...
 $ ld50_or_ha_bil: num  0.148 5.138 NA 2.753 0.392 ...
 $ unsurveyed    : num  0 0 0 0 0 0 0 0 0 1 ...
 $ noncrop       : num  0 0 0 0 0 0 0 0 0 0 ...
 $ source        : chr  "usgs" "usgs" NA "usgs" ...

Save reclass table

#label output with today's date
date <- format(lubridate::today(), format='%Y%m%d')
filename <- paste("../output_big/beetox_I_cdl_reclass",date,"csv", sep = ".")
write.csv(data_full, filename, row.names=FALSE)

Session information

sessionInfo()

R version 3.6.1 (2019-07-05)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Catalina 10.15.7

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] forcats_0.4.0   stringr_1.4.0   dplyr_0.8.3     purrr_0.3.2    
[5] readr_1.3.1     tidyr_1.1.0     tibble_2.1.3    ggplot2_3.2.0  
[9] tidyverse_1.2.1

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.6       cellranger_1.1.0 pillar_1.4.2     compiler_3.6.1  
 [5] tools_3.6.1      digest_0.6.20    lubridate_1.7.4  jsonlite_1.6    
 [9] evaluate_0.14    lifecycle_0.2.0  nlme_3.1-140     gtable_0.3.0    
[13] lattice_0.20-38  pkgconfig_2.0.2  rlang_0.4.7      cli_1.1.0       
[17] rstudioapi_0.10  yaml_2.2.0       haven_2.1.1      xfun_0.8        
[21] withr_2.1.2      xml2_1.2.0       httr_1.4.2       knitr_1.23      
[25] hms_0.5.0        generics_0.0.2   vctrs_0.3.2      grid_3.6.1      
[29] tidyselect_1.1.0 glue_1.3.1       R6_2.4.0         readxl_1.3.1    
[33] rmarkdown_1.14   modelr_0.1.4     magrittr_1.5     ellipsis_0.2.0.1
[37] backports_1.1.4  scales_1.0.0     htmltools_0.3.6  rvest_0.3.4     
[41] assertthat_0.2.1 colorspace_1.4-1 stringi_1.4.3    lazyeval_0.2.2  
[45] munsell_0.5.0    broom_0.5.2      crayon_1.3.4

This R Markdown site was created with workflowr

Generate keys to convert land use to insecticide toxic load - all in one file