Last updated: 2021-10-20

Purpose

This code generates reclassification tables to convert the CDL land use types into pesticide intensity for a particular pesticide compound.

Libraries & functions

library(tidyverse)
── Attaching packages ───────────────────────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.2.0     ✔ purrr   0.3.2
✔ tibble  2.1.3     ✔ dplyr   0.8.3
✔ tidyr   1.1.0     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.4.0
── Conflicts ──────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Load data

# Key template with all CDL land use categories & corresponding USGS crop categories
cdl_key <- read.csv("../keys/cdl_reclass.csv") %>%
  select(value,
         Categories,
         crop = USGS_group) %>%
  mutate(crop = as.character(crop)) %>%
  filter(crop != "Double")

cdl_key_orig <- read.csv("../keys/cdl_reclass.csv") %>%
  select(value,
         Categories,
         crop = USGS_group) %>%
  mutate(crop = as.character(crop))

cdl_key_CA <- read.csv("../keys/cdl_reclass_CA.csv") %>%
  select(value,
         Categories,
         crop = USGS_group) %>%
  mutate(crop = as.character(crop)) %>%
  filter(crop != "Double")

# Bee tox/pesticide data
beetox <- read.csv("../output_big/bee_tox_index_state_yr_cmpd_20200609.csv")

# Double crop data
double <- read.csv("../output_big/beetox_cmpd_cdl_reclass_dblcrops_20210601.csv")
double_CA <- read.csv("../output_big/beetox_cmpd_cdl_reclass_dblcrops_CA_20210601.csv")

Organize, summarize, join datasets

California

# filter and summarize data for California
# generate reclass table
data_CA <- beetox %>%
  filter(STATE_ALPHA == "CA") %>% #select appropriate rows + CA
    select(cmpd_usgs,
           state_fips = State_FIPS_code, 
           state_alpha = STATE_ALPHA, 
           year = Year, 
           cat, 
           crop, 
           kg_ha) %>%
    ungroup() %>%
    mutate(crop = as.character(crop)) %>%
    full_join(cdl_key_CA, by="crop") %>%
    complete(cmpd_usgs, nesting(state_alpha, state_fips), year, nesting(Categories, value, crop)) %>%
    drop_na(state_alpha, year) %>%
    rename(crop_usgs = crop) %>%
    select(cmpd_usgs, year, state_alpha, state_fips, value, Categories, crop_usgs, kg_ha) %>%
  bind_rows(double_CA) %>%
  complete(cmpd_usgs, nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
  drop_na(cmpd_usgs, state_alpha, year) %>%
  mutate(unsurveyed = ifelse(crop_usgs=="Not_surveyed",1,0),
           noncrop = ifelse(crop_usgs=="Non_crop",1,0)) %>%
  #mutate(unsurveyed = ifelse(is.na(unsurveyed), 0, unsurveyed),
         #noncrop = ifelse(is.na(noncrop), 0, noncrop)) %>%  #fills in noncrop for missing double crops
      arrange(cmpd_usgs, year, state_alpha, value) 

All other states

# filter and summarize data for all states except California
# generate reclass table
# this is a pretty slow operation
data <- beetox %>%
  filter(STATE_ALPHA != "CA") %>% #select appropriate rows (exclude CA)
    select(cmpd_usgs,
           state_fips = State_FIPS_code, 
           state_alpha = STATE_ALPHA, 
           year = Year, 
           cat, 
           crop, 
           kg_ha) %>%
    ungroup() %>%
    mutate(crop = as.character(crop)) %>%
    full_join(cdl_key, by="crop") %>%
    complete(cmpd_usgs, nesting(state_alpha, state_fips), year, nesting(Categories, value, crop)) %>%
    drop_na(state_alpha, year) %>%
    rename(crop_usgs = crop) %>%
    select(cmpd_usgs, year, state_alpha, state_fips, value, Categories, crop_usgs, kg_ha) %>%
  bind_rows(double) %>%
  complete(cmpd_usgs, nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
  drop_na(cmpd_usgs, state_alpha, year) %>%
  mutate(unsurveyed = ifelse(crop_usgs=="Not_surveyed", 1,
                             ifelse(str_detect(Categories, "Oats"), 0.5, 0)),
         noncrop = ifelse(crop_usgs=="Non_crop",1,0)) %>%
 # mutate(unsurveyed = ifelse(is.na(unsurveyed), 0, unsurveyed),
         #unsurveyed = ifelse(str_detect(Categories, "Oats"), 0.5, 0),
        # noncrop = ifelse(is.na(noncrop), 0, noncrop)) %>%  #fills in noncrop for missing double crops
      arrange(cmpd_usgs, year, state_alpha, value) 

Combine into full dataset + check

# combine
data_full <- bind_rows(data, data_CA) %>% # bind rows
  filter(!is.na(year) & !is.na(state_fips) & !is.na(cmpd_usgs))

# check overall structure and number of rows
summary(data_full)
  cmpd_usgs         state_alpha          state_fips         year     
 Length:67080384    Length:67080384    Min.   : 1.00   Min.   :1997  
 Class :character   Class :character   1st Qu.:18.75   1st Qu.:2002  
 Mode  :character   Mode  :character   Median :30.50   Median :2007  
                                       Mean   :30.19   Mean   :2007  
                                       3rd Qu.:42.50   3rd Qu.:2012  
                                       Max.   :56.00   Max.   :2017  
                                                                     
  Categories            value        crop_usgs             kg_ha         
 Length:67080384    Min.   :  1.0   Length:67080384    Min.   :  0       
 Class :character   1st Qu.: 43.0   Class :character   1st Qu.:  0       
 Mode  :character   Median : 77.0   Mode  :character   Median :  0       
                    Mean   :120.9                      Mean   :  0       
                    3rd Qu.:218.0                      3rd Qu.:  0       
                    Max.   :254.0                      Max.   :731       
                                                       NA's   :62751926  
   unsurveyed        noncrop      
 Min.   :0.0000   Min.   :0.0000  
 1st Qu.:0.0000   1st Qu.:0.0000  
 Median :0.0000   Median :0.0000  
 Mean   :0.2635   Mean   :0.1527  
 3rd Qu.:1.0000   3rd Qu.:0.0000  
 Max.   :1.0000   Max.   :1.0000  
                                  
str(data_full)
Classes 'tbl_df', 'tbl' and 'data.frame':   67080384 obs. of  10 variables:
 $ cmpd_usgs  : chr  "1METHYLCYCLOPROPENE" "1METHYLCYCLOPROPENE" "1METHYLCYCLOPROPENE" "1METHYLCYCLOPROPENE" ...
 $ state_alpha: chr  "AL" "AL" "AL" "AL" ...
 $ state_fips : int  1 1 1 1 1 1 1 1 1 1 ...
 $ year       : int  1997 1997 1997 1997 1997 1997 1997 1997 1997 1997 ...
 $ Categories : chr  "Corn" "Cotton" "Rice" "Sorghum" ...
 $ value      : int  1 2 3 4 5 6 10 11 12 13 ...
 $ crop_usgs  : chr  "Corn" "Cotton" "Rice" "Other_crops" ...
 $ kg_ha      : num  NA NA NA NA NA NA NA NA NA NA ...
 $ unsurveyed : num  0 0 0 0 0 0 0 0 0 1 ...
 $ noncrop    : num  0 0 0 0 0 0 0 0 0 0 ...
head(table(data_full$year)) # 131 categories x 48 states x 508 cmpds = 3194304

   1997    1998    1999    2000    2001    2002 
3194304 3194304 3194304 3194304 3194304 3194304 
head(table(data_full$cmpd_usgs)) # 131 x 48 states x 21 years = 132048

1METHYLCYCLOPROPENE                 24D                24DB 
             132048              132048              132048 
     6BENZYLADENINE           ABAMECTIN        ABSCISICACID 
             132048              132048              132048 
# check distribution of values
head(table(data_full$noncrop, data_full$crop_usgs))
   
     Alfalfa     Corn   Cotton   Double Non_crop Not_surveyed
  0   512064   512064   512064  8193024        0     17175480
  1        0        0        0        0 10241280            0
   
    Orchards_and_grapes Other_crops Pasture_and_hay     Rice Soybeans
  0             7190232     4715256         2560320   512064   512064
  1                   0           0               0        0        0
   
    Vegetables_and_fruit    Wheat
  0             12908280  1536192
  1                    0        0
head(table(data_full$unsurveyed, data_full$crop_usgs))
     
       Alfalfa     Corn   Cotton   Double Non_crop Not_surveyed
  0     512064   512064   512064  7190232 10241280            0
  0.5        0        0        0  1002792        0            0
  1          0        0        0        0        0     17175480
     
      Orchards_and_grapes Other_crops Pasture_and_hay     Rice Soybeans
  0               7190232     4715256         2560320   512064   512064
  0.5                   0           0               0        0        0
  1                     0           0               0        0        0
     
      Vegetables_and_fruit    Wheat
  0               12908280  1536192
  0.5                    0        0
  1                      0        0
# check distribution of presence/absence
# total possible state-years = 48*21 = 1008
summary <- data_full %>%
  filter(noncrop==0, crop_usgs!="Not_surveyed") %>%
  mutate(present = ifelse(is.na(kg_ha), 0, 1),
         cdl = paste(value, "_", Categories)) %>%
  select(cmpd_usgs, cdl, crop_usgs, present) %>%
  group_by(cmpd_usgs, cdl, crop_usgs) %>%
  summarise(present = mean(present),
            n = n()) 

## heatmap of distribution

# save pesticide classes
class_key <- beetox %>%
  select(cmpd_usgs, cat) %>%
  unique()

summary <- left_join(summary, class_key, by = "cmpd_usgs")

insecticides <- ggplot(filter(summary, cat=="I"), aes(x = cdl, y = cmpd_usgs)) +
  geom_tile(aes(fill = present*100)) +
  scale_fill_continuous(low = "white", high = "blue") +
  facet_grid(. ~ crop_usgs, scales = "free_x", space = "free_x") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  ggtitle("Insecticides") +
  xlab("") +
  ylab("") +
  labs(fill = "State-years \n present (%)")

fungicides <- ggplot(filter(summary, cat=="F"), aes(x = cdl, y = cmpd_usgs)) +
  geom_tile(aes(fill = present*100)) +
  scale_fill_continuous(low = "white", high = "blue") +
  facet_grid(. ~ crop_usgs, scales = "free_x", space = "free_x") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  ggtitle("Fungicides") +
  xlab("") +
  ylab("") +
  labs(fill = "State-years \n present (%)")

herbicides <- ggplot(filter(summary, cat=="H"), aes(x = cdl, y = cmpd_usgs)) +
  geom_tile(aes(fill = present*100)) +
  scale_fill_continuous(low = "white", high = "blue") +
  facet_grid(. ~ crop_usgs, scales = "free_x", space = "free_x") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  ggtitle("Herbicides") +
  xlab("") +
  ylab("") +
  labs(fill = "State-years \n present (%)")

Save reclass tables

# label output with today's date
date <- format(lubridate::today(), format='%Y%m%d')

# create folder for reclass tables to live
folderpath <- paste("../output_big/beetox_cmpd_cdl_reclass",date, sep = "_")
    #create output directory if it doesn't already exist
    if (!dir.exists(folderpath)) {
      dir.create(folderpath)
    }

# split tables by compound
data_full <- ungroup(data_full)
data_list <- split(data_full, as.factor(data_full$cmpd_usgs))
nam <- ls.str(data_list)

walk2(data_list, nam, ~write_csv(.x, paste0(folderpath,"/",.y, ".csv")))

Session information

sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Catalina 10.15.7

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] forcats_0.4.0   stringr_1.4.0   dplyr_0.8.3     purrr_0.3.2    
[5] readr_1.3.1     tidyr_1.1.0     tibble_2.1.3    ggplot2_3.2.0  
[9] tidyverse_1.2.1

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.6       cellranger_1.1.0 pillar_1.4.2     compiler_3.6.1  
 [5] tools_3.6.1      digest_0.6.20    lubridate_1.7.4  jsonlite_1.6    
 [9] evaluate_0.14    lifecycle_0.2.0  nlme_3.1-140     gtable_0.3.0    
[13] lattice_0.20-38  pkgconfig_2.0.2  rlang_0.4.7      cli_1.1.0       
[17] rstudioapi_0.10  yaml_2.2.0       haven_2.1.1      xfun_0.8        
[21] withr_2.1.2      xml2_1.2.0       httr_1.4.2       knitr_1.23      
[25] hms_0.5.0        generics_0.0.2   vctrs_0.3.2      grid_3.6.1      
[29] tidyselect_1.1.0 glue_1.3.1       R6_2.4.0         readxl_1.3.1    
[33] rmarkdown_1.14   modelr_0.1.4     magrittr_1.5     ellipsis_0.2.0.1
[37] backports_1.1.4  scales_1.0.0     htmltools_0.3.6  rvest_0.3.4     
[41] assertthat_0.2.1 colorspace_1.4-1 stringi_1.4.3    lazyeval_0.2.2  
[45] munsell_0.5.0    broom_0.5.2      crayon_1.3.4    

This R Markdown site was created with workflowr