Last updated: 2021-10-19

Purpose

This code adjusts the pesticide use indices to account for scenarios of double cropping (sequential planting of two crops in the same field within a growing season) for all states except California. In double-cropped fields, we assume the total pesticide use (both application rate and units of honey bee toxicity per acre) is a sum of average pesticide use on the component crops.

Libraries & functions

library(dplyr); library(stringr); library(tidyr)

Load & organize data

Aggregated pesticide data

The required datasets for this code are created by the script that calculates the bee toxicity index for state-year-crop combinations.

#load pesticide data grouped by category (insecticide, herbicide, etc)
by_cat <- read.csv("../output_big/bee_tox_index_state_yr_cat_20210625.csv") %>%
  filter(STATE_ALPHA!="CA")
by_compound <- read.csv("../output_big/bee_tox_index_state_yr_cmpd_20200609.csv")  %>%
  filter(STATE_ALPHA!="CA")

Separate double cropped classes into component crops

Load NASS CDL reclass table

#load crop data layer reclass table
#'USGS group' column matches 'crop' in the pesticide table
cdl_all <- read.csv("../keys/cdl_reclass.csv") 
head(cdl_all)
  value Categories  USGS_group
1     1       Corn        Corn
2     2     Cotton      Cotton
3     3       Rice        Rice
4     4    Sorghum Other_crops
5     5   Soybeans    Soybeans
6     6  Sunflower Other_crops

Identify double crop classes & label component crops

dbl_crops <- filter(cdl_all, str_detect(Categories, pattern='Dbl Crop')) %>%
    mutate(Categories_tmp = gsub(Categories, pattern="Dbl Crop", replacement="")) %>%
    separate(col=Categories_tmp, into= c("Crop1", "Crop2"), sep ="/") %>%
    select(value, Categories, Crop1, Crop2, everything()) %>%
    mutate(Crop1 = trimws(Crop1), Crop2=trimws(Crop2))

#store names of double crops that are not in larger cdl dataset (altered name)
tofix_crop1 <- unique(dbl_crops$Crop1[!dbl_crops$Crop1 %in% cdl_all$Categories])
tofix_crop2 <- unique(dbl_crops$Crop2[!dbl_crops$Crop2 %in% cdl_all$Categories])

#correct crop names to match larger CDL table
dbl_crops <- mutate(dbl_crops, Crop1 = if_else(Crop1 == 'WinWht', 'Winter Wheat', 
                                       if_else(Crop1 == 'Durum Wht', "Durum Wheat", Crop1)),
                               Crop2 = if_else(Crop2 == 'Cantaloupe', 'Cantaloupes', 
                                       if_else(Crop2 == 'Durum Wht', "Durum Wheat", Crop2))
                    )

#check that all crop names (including double crops) exist in larger CDL dataset (value of 0 here means all names are correct)
which(!c(dbl_crops$Crop1, dbl_crops$Crop2) %in% cdl_all$Categories)
integer(0)
head(dbl_crops)
  value                  Categories        Crop1       Crop2 USGS_group
1    26    Dbl Crop WinWht/Soybeans Winter Wheat    Soybeans     Double
2   225        Dbl Crop WinWht/Corn Winter Wheat        Corn     Double
3   226          Dbl Crop Oats/Corn         Oats        Corn     Double
4   230  Dbl Crop Lettuce/Durum Wht      Lettuce Durum Wheat     Double
5   231 Dbl Crop Lettuce/Cantaloupe      Lettuce Cantaloupes     Double
6   232     Dbl Crop Lettuce/Cotton      Lettuce      Cotton     Double

Match double crops with ‘USGS group’

Convert to ‘long’ format to combine double crop classes with USGS group.

dbl_long <- select(dbl_crops, value, Categories, Crop1, Crop2) %>%
  rename(Categories.CDL = Categories) %>%
  tidyr::pivot_longer(-value & -Categories.CDL, 
                      values_to='CmpCrop', names_to='remove') %>%
  select(-remove)

#match new long format data to appropriate USGS group
dbl_long <- select(cdl_all, Categories, USGS_group) %>%
  right_join(dbl_long,by=c('Categories' = 'CmpCrop')) %>%
  rename(Categories.CmpCrop = Categories, USGS_group.CmpCrop = USGS_group) %>%
  select(value, Categories.CDL, Categories.CmpCrop, USGS_group.CmpCrop)
head(dbl_long)
  value           Categories.CDL Categories.CmpCrop USGS_group.CmpCrop
1    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
2    26 Dbl Crop WinWht/Soybeans           Soybeans           Soybeans
3   225     Dbl Crop WinWht/Corn       Winter Wheat              Wheat
4   225     Dbl Crop WinWht/Corn               Corn               Corn
5   226       Dbl Crop Oats/Corn               Oats       Not_surveyed
6   226       Dbl Crop Oats/Corn               Corn               Corn

Combine double crops with appropriate pesticide data

#using left join filters data to only CDL classes that were double cropped
by_cat_dblcrop <- dbl_long %>%
  left_join(select(by_cat, -ha), by=c("USGS_group.CmpCrop" = 'crop'))
head(by_cat_dblcrop)
  value           Categories.CDL Categories.CmpCrop USGS_group.CmpCrop
1    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
2    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
3    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
4    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
5    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
6    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
  Year   State STATE_ALPHA State_FIPS_code cat  kg_app        kg_ha
1 1997 Alabama          AL               1   F 13394.4 0.3871593631
2 1997 Alabama          AL               1   H  7420.8 0.2144950279
3 1997 Alabama          AL               1   I   791.0 0.0228635143
4 1997 Arizona          AZ               4   F     2.9 0.0000684443
5 1997 Arizona          AZ               4   H 22341.2 0.5272854438
6 1997 Arizona          AZ               4   I 13903.9 0.3281526544
  ld50_ct_ha  ld50_ct_tot ld50_or_ha  ld50_or_tot source
1         NA           NA         NA           NA   usgs
2         NA           NA         NA           NA   usgs
3  158180098 5.472495e+12   55127026 1.907208e+12   usgs
4         NA           NA         NA           NA   usgs
5         NA           NA         NA           NA   usgs
6  207959712 8.811299e+12   47697700 2.020962e+12   usgs
#using left join filters data to only CDL classes that were double cropped
by_cmpd_dblcrop <- dbl_long  %>%
  left_join(select(by_compound, -ha), by=c("USGS_group.CmpCrop" = 'crop'))
head(by_cat_dblcrop)
  value           Categories.CDL Categories.CmpCrop USGS_group.CmpCrop
1    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
2    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
3    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
4    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
5    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
6    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
  Year   State STATE_ALPHA State_FIPS_code cat  kg_app        kg_ha
1 1997 Alabama          AL               1   F 13394.4 0.3871593631
2 1997 Alabama          AL               1   H  7420.8 0.2144950279
3 1997 Alabama          AL               1   I   791.0 0.0228635143
4 1997 Arizona          AZ               4   F     2.9 0.0000684443
5 1997 Arizona          AZ               4   H 22341.2 0.5272854438
6 1997 Arizona          AZ               4   I 13903.9 0.3281526544
  ld50_ct_ha  ld50_ct_tot ld50_or_ha  ld50_or_tot source
1         NA           NA         NA           NA   usgs
2         NA           NA         NA           NA   usgs
3  158180098 5.472495e+12   55127026 1.907208e+12   usgs
4         NA           NA         NA           NA   usgs
5         NA           NA         NA           NA   usgs
6  207959712 8.811299e+12   47697700 2.020962e+12   usgs

Sum pesticide values for component crops

For all pesticide data (LD50 values, kg applied, etc.), generates one value for each double crop type included in the CDL.

# sum double crop values
by_cat_dblcrop_summed <- group_by(by_cat_dblcrop, value, Categories.CDL, Year, State, 
                                  State_FIPS_code, STATE_ALPHA, cat) %>%
  summarise(kg_ha = sum(kg_ha, na.rm=T),
         ld50_ct_ha = sum(ld50_ct_ha, na.rm=T),
         ld50_or_ha = sum(ld50_or_ha, na.rm=T),
         n = n())
head(by_cat_dblcrop_summed)
# A tibble: 6 x 11
# Groups:   value, Categories.CDL, Year, State, State_FIPS_code,
#   STATE_ALPHA [2]
  value Categories.CDL  Year State State_FIPS_code STATE_ALPHA cat  
  <int> <fct>          <int> <fct>           <int> <fct>       <fct>
1    26 Dbl Crop WinW…  1997 Alab…               1 AL          F    
2    26 Dbl Crop WinW…  1997 Alab…               1 AL          H    
3    26 Dbl Crop WinW…  1997 Alab…               1 AL          I    
4    26 Dbl Crop WinW…  1997 Alab…               1 AL          O    
5    26 Dbl Crop WinW…  1997 Alab…               1 AL          PGR  
6    26 Dbl Crop WinW…  1997 Ariz…               4 AZ          F    
# … with 4 more variables: kg_ha <dbl>, ld50_ct_ha <dbl>,
#   ld50_or_ha <dbl>, n <int>
# generate source column to indicate source of estimates 
# (coded so least-solid source for each pair reported; and 'missing' for those w/ only one value)
by_cat_dblcrop_source <- select(by_cat_dblcrop, Categories.CmpCrop, value, Categories.CDL, Year, State, 
                                  State_FIPS_code, STATE_ALPHA, cat, source) %>%
  pivot_wider(names_from = Categories.CmpCrop, values_from = source) %>%
  unite("source_raw", 'Winter Wheat':Sorghum, remove = TRUE, na.rm=TRUE) %>%
  mutate(source = recode(source_raw,  
         usgs_usgs = "usgs",
         interp_interp = "interp",
         interp_interp_s = "interp_s",
         interp_s_interp = "interp_s",
         usgs_interp = "interp",
         interp_usgs = "interp",
         interp_s_usgs = "interp_s",
         usgs_interp_s = "interp_s",
         interp_s_interp_s = "interp_s",
         interp = "missing",
         interp_s = "missing",
         usgs = "missing")) %>%
  na_if("missing") %>%
  select(-source_raw)

by_cat_dblcrop_summed <- by_cat_dblcrop_summed %>%
  left_join(by_cat_dblcrop_source)
Joining, by = c("value", "Categories.CDL", "Year", "State", "State_FIPS_code", "STATE_ALPHA", "cat")
table(by_cat_dblcrop_summed$n, by_cat_dblcrop_summed$source)
   
          interp interp_s  usgs
  1     2      0        0     0
  2     0   8005     4436 27426
# sum double crop values + join observations per row
by_cmpd_dblcrop_summed <- group_by(by_cmpd_dblcrop, value, Categories.CDL, Year, State, cmpd_usgs, State_FIPS_code, STATE_ALPHA, cat) %>%
  summarise(kg_ha = sum(kg_ha, na.rm=T),
         n = n())
head(by_cmpd_dblcrop_summed)
# A tibble: 6 x 10
# Groups:   value, Categories.CDL, Year, State, cmpd_usgs,
#   State_FIPS_code, STATE_ALPHA [6]
  value Categories.CDL  Year State cmpd_usgs State_FIPS_code STATE_ALPHA
  <int> <fct>          <int> <fct> <fct>               <int> <fct>      
1    26 Dbl Crop WinW…  1997 Alab… 24D                     1 AL         
2    26 Dbl Crop WinW…  1997 Alab… ACIFLUOR…               1 AL         
3    26 Dbl Crop WinW…  1997 Alab… ALACHLOR                1 AL         
4    26 Dbl Crop WinW…  1997 Alab… BENOMYL                 1 AL         
5    26 Dbl Crop WinW…  1997 Alab… BENTAZONE               1 AL         
6    26 Dbl Crop WinW…  1997 Alab… CAPTAN                  1 AL         
# … with 3 more variables: cat <fct>, kg_ha <dbl>, n <int>

Format reclass tables

To match other reclass data (insecticide toxic load)

# filter to only insecticide category & where both double-crops present & pre-2015 (ST present)
# also reorganize to match other reclass formatting and add surveyed/not column
# rows with both double-crops present in dataset = surveyed, otherwise not
reclass <- filter(by_cat_dblcrop_summed, cat == 'I', Year < 2015) %>%
  mutate(ld50_ct_ha_bil = ld50_ct_ha/10^9,
            ld50_or_ha_bil = ld50_or_ha/10^9,
         crop_usgs = "Double") %>%
    ungroup() %>%
    select(state_fips = State_FIPS_code, 
           state_alpha = STATE_ALPHA, 
           value,
           Categories = Categories.CDL,
           crop_usgs,
           year = Year, 
           cat, 
           kg_ha,
           ld50_ct_ha_bil,
           ld50_or_ha_bil,
           n,
           source) %>%
    complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
    mutate(unsurveyed = ifelse(str_detect(Categories, "Oats"), 0.5, 0),
           kg_ha = ifelse(n==2, kg_ha, NA),
           ld50_ct_ha_bil = ifelse(n==2, ld50_ct_ha_bil, NA),
           ld50_or_ha_bil = ifelse(n==2, ld50_or_ha_bil, NA),
           noncrop = 0) %>%
    select(year, state_alpha, state_fips, value, Categories, crop_usgs, kg_ha, ld50_ct_ha_bil, ld50_or_ha_bil, unsurveyed, noncrop, source) %>%
    arrange(year, state_alpha, value)

head(reclass) 
# A tibble: 6 x 12
   year state_alpha state_fips value Categories crop_usgs   kg_ha
  <int> <fct>            <int> <int> <fct>      <chr>       <dbl>
1  1997 AL                   1    26 Dbl Crop … Double     0.0807
2  1997 AL                   1   225 Dbl Crop … Double     0.0836
3  1997 AL                   1   226 Dbl Crop … Double    NA     
4  1997 AL                   1   230 Dbl Crop … Double     4.85  
5  1997 AL                   1   231 Dbl Crop … Double     9.65  
6  1997 AL                   1   232 Dbl Crop … Double     5.76  
# … with 5 more variables: ld50_ct_ha_bil <dbl>, ld50_or_ha_bil <dbl>,
#   unsurveyed <dbl>, noncrop <dbl>, source <chr>
table(reclass$year) # 47 states x 16 double crops = 752

1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 
 752  752  752  752  752  752  752  752  752  752  752  752  752  752  752 
2012 2013 2014 
 752  752  752 

To match other reclass data (by category)

# filter to where both double-crops present
# also reorganize to match other reclass formatting and add surveyed/not column
# rows with both double-crops present in dataset = included, otherwise NA
# split into herbicides, fungicides

reclass_cat <- filter(by_cat_dblcrop_summed, cat %in% c("H", "F")) %>%
  mutate(crop_usgs = "Double") %>%
      ungroup() %>%
    select(state_fips = State_FIPS_code, 
           state_alpha = STATE_ALPHA, 
           value,
           Categories = Categories.CDL,
           crop_usgs,
           year = Year, 
           cat, 
           kg_ha,
           n,
           source) 

reclass_H <- filter(reclass_cat, cat == "H") %>%
    complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
    mutate(n = replace_na(n, 0),
           unsurveyed = ifelse(str_detect(Categories, "Oats"), 0.5, 0),
           kg_ha = ifelse(n==2, kg_ha, NA),
           noncrop = 0,
           cat = "H") %>%
    select(-n) %>%
    arrange(year, state_alpha, value) 
    
reclass_F <- filter(reclass_cat, cat == "F") %>%
    complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
    mutate(n = replace_na(n, 0),
           unsurveyed = ifelse(str_detect(Categories, "Oats"), 0.5, 0),
           kg_ha = ifelse(n==2, kg_ha, NA),
           noncrop = 0,
           cat = "F") %>%
    select(-n) %>%
    arrange(year, state_alpha, value) 

table(reclass_H$year) # 47 states x 16 double crops = 752

1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 
 752  752  752  752  752  752  752  752  752  752  752  752  752  752  752 
2012 2013 2014 2015 2016 2017 
 752  752  752  752  752  752 
table(reclass_F$year)

1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 
 752  752  752  752  752  752  752  752  752  752  752  752  752  752  752 
2012 2013 2014 2015 2016 2017 
 752  752  752  752  752  752 
head(reclass_H)
# A tibble: 6 x 11
  state_alpha state_fips  year Categories value crop_usgs cat   kg_ha
  <fct>            <int> <int> <fct>      <int> <chr>     <chr> <dbl>
1 AL                   1  1997 Dbl Crop …    26 Double    H      1.82
2 AL                   1  1997 Dbl Crop …   225 Double    H      3.91
3 AL                   1  1997 Dbl Crop …   226 Double    H     NA   
4 AL                   1  1997 Dbl Crop …   230 Double    H     19.3 
5 AL                   1  1997 Dbl Crop …   231 Double    H     38.2 
6 AL                   1  1997 Dbl Crop …   232 Double    H     23.5 
# … with 3 more variables: source <chr>, unsurveyed <dbl>, noncrop <dbl>
head(reclass_F)
# A tibble: 6 x 11
  state_alpha state_fips  year Categories value crop_usgs cat    kg_ha
  <fct>            <int> <int> <fct>      <int> <chr>     <chr>  <dbl>
1 AL                   1  1997 Dbl Crop …    26 Double    F      0.398
2 AL                   1  1997 Dbl Crop …   225 Double    F      0.390
3 AL                   1  1997 Dbl Crop …   226 Double    F     NA    
4 AL                   1  1997 Dbl Crop …   230 Double    F     21.9  
5 AL                   1  1997 Dbl Crop …   231 Double    F     43.0  
6 AL                   1  1997 Dbl Crop …   232 Double    F     21.8  
# … with 3 more variables: source <chr>, unsurveyed <dbl>, noncrop <dbl>

To match other reclass data (compound data)

#filter to only where both double-crops are present
# also reorganize to match other reclass formatting and add surveyed/not column

reclass_cmpd <- filter(by_cmpd_dblcrop_summed, n >= 1) %>%
  mutate(crop_usgs = "Double") %>%
    select(state_fips = State_FIPS_code, 
           state_alpha = STATE_ALPHA, 
           Categories = Categories.CDL,
           crop_usgs,
           year = Year, 
           cat, 
           cmpd_usgs,
           kg_ha,
           n) %>%
    ungroup() %>%
    mutate(Categories = as.character(Categories)) %>%
    complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
    mutate(unsurveyed = ifelse(str_detect(Categories, "Oats"), 0.5, 0),
           noncrop = 0) %>%
    select(cmpd_usgs, year, state_alpha, state_fips, value, Categories, crop_usgs, kg_ha, unsurveyed, noncrop) %>%
    arrange(cmpd_usgs, year, state_alpha, value)
Adding missing grouping variables: `value`, `State`
head(reclass_cmpd)
# A tibble: 6 x 10
  cmpd_usgs  year state_alpha state_fips value Categories crop_usgs  kg_ha
  <fct>     <int> <fct>            <int> <int> <chr>      <chr>      <dbl>
1 24D        1997 AL                   1    26 Dbl Crop … Double    0.183 
2 24D        1997 AL                   1   225 Dbl Crop … Double    0.197 
3 24D        1997 AL                   1   226 Dbl Crop … Double    0.0138
4 24D        1997 AL                   1   230 Dbl Crop … Double    0.183 
5 24D        1997 AL                   1   233 Dbl Crop … Double    0.0974
6 24D        1997 AL                   1   234 Dbl Crop … Double    0.280 
# … with 2 more variables: unsurveyed <dbl>, noncrop <dbl>
table(reclass_cmpd$year)

 1997  1998  1999  2000  2001  2002  2003  2004  2005  2006  2007  2008 
37541 35554 35365 36081 35682 35628 35163 37393 36167 37791 37092 38068 
 2009  2010  2011  2012  2013  2014  2015  2016  2017 
40884 42309 42540 43247 45493 46001 41553 40689 40163 

Save double crop reclass tables

This is the pesticide reclass table for double crops, and will be combined with the global reclass table.

#label output with today's date
date <- format(lubridate::today(), format='%Y%m%d')
filename <- paste0("./output_big/beetox_I_cdl_reclass_dblcrops_",date,".csv")
write.csv(reclass, filename, row.names=FALSE)

filename_H <- paste0("./output_big/beetox_H_cdl_reclass_dblcrops_",date,".csv")
write.csv(reclass_H, filename_H, row.names=FALSE)

filename_F <- paste0("./output_big/beetox_F_cdl_reclass_dblcrops_",date,".csv")
write.csv(reclass_F, filename_F, row.names=FALSE)

filename_cmpd <- paste0("./output_big/beetox_cmpd_cdl_reclass_dblcrops_",date,".csv")
write.csv(reclass_cmpd, filename_cmpd, row.names=FALSE)

Notes

  • Took slightly different approach with category sums vs. compound-specific data…
  • For category sums, only included double crop estimate if a numerical estimate was available for both crops in the double crop, made sure there was an entry for every CDL category, marking as NA if estimates were unavailable for one or more of the double crops.
  • For compound sums, included double crop estimate if a numerical estimate was available for at least one of the crops in the double crop (since some AIs may be used in one crop but not others). Did not ensure all CDL categories were represented.
  • In both cases, ‘unsurveyed’ column = 0.5 for those double crops including oats, which was not surveyed (all other crops in double crops included in Kynetec survey)

Session information

sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Catalina 10.15.7

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] tidyr_1.1.0   stringr_1.4.0 dplyr_0.8.3  

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.6       knitr_1.23       magrittr_1.5     tidyselect_1.1.0
 [5] R6_2.4.0         rlang_0.4.7      fansi_0.4.0      tools_3.6.1     
 [9] xfun_0.8         utf8_1.1.4       cli_1.1.0        htmltools_0.3.6 
[13] ellipsis_0.2.0.1 yaml_2.2.0       assertthat_0.2.1 digest_0.6.20   
[17] tibble_2.1.3     lifecycle_0.2.0  crayon_1.3.4     purrr_0.3.2     
[21] vctrs_0.3.2      glue_1.3.1       evaluate_0.14    rmarkdown_1.14  
[25] stringi_1.4.3    compiler_3.6.1   pillar_1.4.2     pkgconfig_2.0.2 

This R Markdown site was created with workflowr