Adjust Bee Tox Index for double cropping

Last updated: 2021-10-19

Purpose

This code adjusts the pesticide use indices to account for scenarios of double cropping (sequential planting of two crops in the same field within a growing season) for California only. In double-cropped fields, we assume the total pesticide use (both application rate and units of honey bee toxicity per acre) is a sum of average pesticide use on the component crops.

Libraries & functions

library(dplyr); library(stringr); library(tidyr)

Load & organize data

Aggregated pesticide data

The required datasets for this code are created by the script that calculates the bee toxicity index for state-year-crop combinations.

#load pesticide data grouped by category (insecticide, herbicide, etc)
by_cat <- read.csv("../output_big/bee_tox_index_state_yr_cat_20210625.csv") %>%
  filter(STATE_ALPHA=="CA")
by_compound <- read.csv("../output_big/bee_tox_index_state_yr_cmpd_20200609.csv")  %>%
  filter(STATE_ALPHA=="CA")

Separate double cropped classes into component crops

Load NASS CDL reclass table

#load crop data layer reclass table
#'USGS group' column matches 'crop' in the pesticide table
cdl_all <- read.csv("../keys/cdl_reclass_CA.csv") 
head(cdl_all)

  value Categories  USGS_group
1     1       Corn        Corn
2     2     Cotton      Cotton
3     3       Rice        Rice
4     4    Sorghum Other_crops
5     5   Soybeans    Soybeans
6     6  Sunflower Other_crops

Identify double crop classes & label component crops

dbl_crops <- filter(cdl_all, str_detect(Categories, pattern='Dbl Crop')) %>%
    mutate(Categories_tmp = gsub(Categories, pattern="Dbl Crop", replacement="")) %>%
    separate(col=Categories_tmp, into= c("Crop1", "Crop2"), sep ="/") %>%
    select(value, Categories, Crop1, Crop2, everything()) %>%
    mutate(Crop1 = trimws(Crop1), Crop2=trimws(Crop2))

#store names of double crops that are not in larger cdl dataset (altered name)
tofix_crop1 <- unique(dbl_crops$Crop1[!dbl_crops$Crop1 %in% cdl_all$Categories])
tofix_crop2 <- unique(dbl_crops$Crop2[!dbl_crops$Crop2 %in% cdl_all$Categories])

#correct crop names to match larger CDL table
dbl_crops <- mutate(dbl_crops, Crop1 = if_else(Crop1 == 'WinWht', 'Winter Wheat', 
                                       if_else(Crop1 == 'Durum Wht', "Durum Wheat", Crop1)),
                               Crop2 = if_else(Crop2 == 'Cantaloupe', 'Cantaloupes', 
                                       if_else(Crop2 == 'Durum Wht', "Durum Wheat", Crop2))
                    )

#check that all crop names (including double crops) exist in larger CDL dataset (value of 0 here means all names are correct)
which(!c(dbl_crops$Crop1, dbl_crops$Crop2) %in% cdl_all$Categories)

integer(0)

head(dbl_crops)

  value                  Categories        Crop1       Crop2 USGS_group
1    26    Dbl Crop WinWht/Soybeans Winter Wheat    Soybeans     Double
2   225        Dbl Crop WinWht/Corn Winter Wheat        Corn     Double
3   226          Dbl Crop Oats/Corn         Oats        Corn     Double
4   230  Dbl Crop Lettuce/Durum Wht      Lettuce Durum Wheat     Double
5   231 Dbl Crop Lettuce/Cantaloupe      Lettuce Cantaloupes     Double
6   232     Dbl Crop Lettuce/Cotton      Lettuce      Cotton     Double

Match double crops with ‘USGS group’

Convert to ‘long’ format to combine double crop classes with USGS group.

dbl_long <- select(dbl_crops, value, Categories, Crop1, Crop2) %>%
  rename(Categories.CDL = Categories) %>%
  tidyr::pivot_longer(-value & -Categories.CDL, 
                      values_to='CmpCrop', names_to='remove') %>%
  select(-remove)

#match new long format data to appropriate USGS group
dbl_long <- select(cdl_all, Categories, USGS_group) %>%
  right_join(dbl_long,by=c('Categories' = 'CmpCrop')) %>%
  rename(Categories.CmpCrop = Categories, USGS_group.CmpCrop = USGS_group) %>%
  select(value, Categories.CDL, Categories.CmpCrop, USGS_group.CmpCrop)
head(dbl_long)

  value           Categories.CDL Categories.CmpCrop USGS_group.CmpCrop
1    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
2    26 Dbl Crop WinWht/Soybeans           Soybeans           Soybeans
3   225     Dbl Crop WinWht/Corn       Winter Wheat              Wheat
4   225     Dbl Crop WinWht/Corn               Corn               Corn
5   226       Dbl Crop Oats/Corn               Oats        Other_crops
6   226       Dbl Crop Oats/Corn               Corn               Corn

Combine double crops with appropriate pesticide data

#using left join filters data to only CDL classes that were double cropped
by_cat_dblcrop <- dbl_long %>%
  left_join(select(by_cat, -ha), by=c("USGS_group.CmpCrop" = 'crop'))
head(by_cat_dblcrop)

  value           Categories.CDL Categories.CmpCrop USGS_group.CmpCrop
1    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
2    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
3    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
4    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
5    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
6    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
  Year      State STATE_ALPHA State_FIPS_code cat   kg_app        kg_ha
1 1997 California          CA               6   F   2270.0 9.643054e-03
2 1997 California          CA               6 FUM   1890.0 8.028798e-03
3 1997 California          CA               6   H 205022.4 8.709436e-01
4 1997 California          CA               6   I  22819.1 9.693648e-02
5 1997 California          CA               6   O   1085.5 4.611249e-03
6 1997 California          CA               6 PGR     16.7 7.094229e-05
  ld50_ct_ha  ld50_ct_tot ld50_or_ha  ld50_or_tot source
1         NA           NA         NA           NA   usgs
2         NA           NA         NA           NA   usgs
3         NA           NA         NA           NA   usgs
4  313302864 7.375231e+13  419715080 9.880202e+13   usgs
5         NA           NA         NA           NA   usgs
6         NA           NA         NA           NA   usgs

#using left join filters data to only CDL classes that were double cropped
by_cmpd_dblcrop <- dbl_long %>%
  left_join(select(by_compound, -ha), by=c("USGS_group.CmpCrop" = 'crop'))
head(by_cat_dblcrop)

  value           Categories.CDL Categories.CmpCrop USGS_group.CmpCrop
1    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
2    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
3    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
4    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
5    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
6    26 Dbl Crop WinWht/Soybeans       Winter Wheat              Wheat
  Year      State STATE_ALPHA State_FIPS_code cat   kg_app        kg_ha
1 1997 California          CA               6   F   2270.0 9.643054e-03
2 1997 California          CA               6 FUM   1890.0 8.028798e-03
3 1997 California          CA               6   H 205022.4 8.709436e-01
4 1997 California          CA               6   I  22819.1 9.693648e-02
5 1997 California          CA               6   O   1085.5 4.611249e-03
6 1997 California          CA               6 PGR     16.7 7.094229e-05
  ld50_ct_ha  ld50_ct_tot ld50_or_ha  ld50_or_tot source
1         NA           NA         NA           NA   usgs
2         NA           NA         NA           NA   usgs
3         NA           NA         NA           NA   usgs
4  313302864 7.375231e+13  419715080 9.880202e+13   usgs
5         NA           NA         NA           NA   usgs
6         NA           NA         NA           NA   usgs

Sum pesticide values for component crops

For all pesticide data (LD50 values, kg applied, etc.), generates one value for each double crop type included in the CDL.

# sum double crop values
by_cat_dblcrop_summed <- group_by(by_cat_dblcrop, value, Categories.CDL, Year, State, 
                                  State_FIPS_code, STATE_ALPHA, cat) %>%
  summarise(kg_ha = sum(kg_ha, na.rm=T),
         ld50_ct_ha = sum(ld50_ct_ha, na.rm=T),
         ld50_or_ha = sum(ld50_or_ha, na.rm=T),
         n = n())
head(by_cat_dblcrop_summed)

# A tibble: 6 x 11
# Groups:   value, Categories.CDL, Year, State, State_FIPS_code,
#   STATE_ALPHA [1]
  value Categories.CDL  Year State State_FIPS_code STATE_ALPHA cat  
  <int> <fct>          <int> <fct>           <int> <fct>       <fct>
1    26 Dbl Crop WinW…  1997 Cali…               6 CA          F    
2    26 Dbl Crop WinW…  1997 Cali…               6 CA          FUM  
3    26 Dbl Crop WinW…  1997 Cali…               6 CA          H    
4    26 Dbl Crop WinW…  1997 Cali…               6 CA          I    
5    26 Dbl Crop WinW…  1997 Cali…               6 CA          O    
6    26 Dbl Crop WinW…  1997 Cali…               6 CA          PGR  
# … with 4 more variables: kg_ha <dbl>, ld50_ct_ha <dbl>,
#   ld50_or_ha <dbl>, n <int>

# generate source column to indicate source of estimates 
# (coded so least-solid source for each pair reported; and 'missing' for those w/ only one value)
by_cat_dblcrop_source <- select(by_cat_dblcrop, Categories.CmpCrop, value, Categories.CDL, Year, State, 
                                  State_FIPS_code, STATE_ALPHA, cat, source) %>%
  pivot_wider(names_from = Categories.CmpCrop, values_from = source) %>%
  unite("source_raw", 'Winter Wheat':Sorghum, remove = TRUE, na.rm=TRUE) %>%
  mutate(source = recode(source_raw,  
         usgs_usgs = "usgs",
         interp_interp = "interp",
         interp_interp_s = "interp_s",
         interp_s_interp = "interp_s",
         usgs_interp = "interp",
         interp_usgs = "interp",
         interp_s_usgs = "interp_s",
         usgs_interp_s = "interp_s",
         interp_s_interp_s = "interp_s",
         interp = "missing",
         interp_s = "missing",
         usgs = "missing")) %>%
  na_if("missing") %>%
  select(-source_raw)

by_cat_dblcrop_summed <- by_cat_dblcrop_summed %>%
  left_join(by_cat_dblcrop_source)

Joining, by = c("value", "Categories.CDL", "Year", "State", "State_FIPS_code", "STATE_ALPHA", "cat")

table(by_cat_dblcrop_summed$n, by_cat_dblcrop_summed$source)

   
         interp usgs
  1    5      0    0
  2    0    122 1231

# sum double crop values + join observations per row
by_cmpd_dblcrop_summed <- group_by(by_cmpd_dblcrop, value, Categories.CDL, Year, State, cmpd_usgs,State_FIPS_code, STATE_ALPHA, cat) %>%
  summarise(kg_ha = sum(kg_ha, na.rm=T),
         n = n())
head(by_cmpd_dblcrop_summed)

# A tibble: 6 x 10
# Groups:   value, Categories.CDL, Year, State, cmpd_usgs,
#   State_FIPS_code, STATE_ALPHA [6]
  value Categories.CDL  Year State cmpd_usgs State_FIPS_code STATE_ALPHA
  <int> <fct>          <int> <fct> <fct>               <int> <fct>      
1    26 Dbl Crop WinW…  1997 Cali… 24D                     6 CA         
2    26 Dbl Crop WinW…  1997 Cali… ALUMINUM…               6 CA         
3    26 Dbl Crop WinW…  1997 Cali… BARBAN                  6 CA         
4    26 Dbl Crop WinW…  1997 Cali… BROMOXYN…               6 CA         
5    26 Dbl Crop WinW…  1997 Cali… CALCIUMC…               6 CA         
6    26 Dbl Crop WinW…  1997 Cali… CAPTAN                  6 CA         
# … with 3 more variables: cat <fct>, kg_ha <dbl>, n <int>

Format reclass tables

To match other reclass data (insecticide toxic load)

# filter to only insecticide category & where both double-crops present
# also reorganize to match other reclass formatting and add surveyed/not column (all double crops surveyed in CA)
# rows with both double-crops present in dataset = surveyed, otherwise not
reclass <- filter(by_cat_dblcrop_summed, cat == 'I') %>%
  mutate(ld50_ct_ha_bil = ld50_ct_ha/10^9,
            ld50_or_ha_bil = ld50_or_ha/10^9,
         crop_usgs = "Double") %>%
    select(state_fips = State_FIPS_code, 
           state_alpha = STATE_ALPHA, 
           Categories = Categories.CDL,
           crop_usgs,
           year = Year, 
           cat, 
           kg_ha,
           ld50_ct_ha_bil,
           ld50_or_ha_bil,
           n,
           source) %>%
    ungroup() %>%
    complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
    mutate(unsurveyed = 0,
           kg_ha = ifelse(n==2, kg_ha, NA),
           ld50_ct_ha_bil = ifelse(n==2, ld50_ct_ha_bil, NA),
           ld50_or_ha_bil = ifelse(n==2, ld50_or_ha_bil, NA),
           noncrop = 0) %>%
    select(year, state_alpha, state_fips, value, Categories, crop_usgs, kg_ha, ld50_ct_ha_bil, ld50_or_ha_bil, unsurveyed, noncrop, source) %>%
    arrange(year, state_alpha, value)

Adding missing grouping variables: `value`, `State`

head(reclass)

# A tibble: 6 x 12
   year state_alpha state_fips value Categories crop_usgs kg_ha
  <int> <fct>            <int> <int> <fct>      <chr>     <dbl>
1  1997 CA                   6    26 Dbl Crop … Double    NA   
2  1997 CA                   6   225 Dbl Crop … Double     1.48
3  1997 CA                   6   226 Dbl Crop … Double     2.24
4  1997 CA                   6   230 Dbl Crop … Double     2.43
5  1997 CA                   6   231 Dbl Crop … Double     4.66
6  1997 CA                   6   232 Dbl Crop … Double     5.95
# … with 5 more variables: ld50_ct_ha_bil <dbl>, ld50_or_ha_bil <dbl>,
#   unsurveyed <dbl>, noncrop <dbl>, source <chr>

table(reclass$year)


1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 
  16   16   16   16   16   16   16   16   16   16   16   16   16   16   16 
2012 2013 2014 
  16   16   16

To match other reclass data (by category)

# also reorganize to match other reclass formatting and add surveyed/not column (all double crops surveyed in CA)
# rows with both double-crops present in dataset = included, otherwise NA
# split into herbicides, fungicides

reclass_cat <- filter(by_cat_dblcrop_summed, cat %in% c("H", "F")) %>%
  mutate(crop_usgs = "Double") %>%
      ungroup() %>%
    select(state_fips = State_FIPS_code, 
           state_alpha = STATE_ALPHA, 
           value,
           Categories = Categories.CDL,
           crop_usgs,
           year = Year, 
           cat, 
           kg_ha,
           n,
           source) 

reclass_H <- filter(reclass_cat, cat == "H") %>%
    complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
    mutate(n = replace_na(n, 0),
           unsurveyed = 0,
           kg_ha = ifelse(n==2, kg_ha, NA),
           noncrop = 0,
           cat = "H") %>%
    select(-n) %>%
    arrange(year, state_alpha, value) 
    
reclass_F <- filter(reclass_cat, cat == "F") %>%
    complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
    mutate(n = replace_na(n, 0),
           unsurveyed = 0,
           kg_ha = ifelse(n==2, kg_ha, NA),
           noncrop = 0,
           cat = "F") %>%
    select(-n) %>%
    arrange(year, state_alpha, value) 

table(reclass_H$year)


1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 
  16   16   16   16   16   16   16   16   16   16   16   16   16   16   16 
2012 2013 2014 2015 2016 2017 
  16   16   16   16   16   16

table(reclass_F$year)


1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 
  16   16   16   16   16   16   16   16   16   16   16   16   16   16   16 
2012 2013 2014 2015 2016 2017 
  16   16   16   16   16   16

To match other reclass data (by compound data)

# filter to only where at least one double-crops present
# also reorganize to match other reclass formatting and add surveyed/not column

reclass_cmpd <- filter(by_cmpd_dblcrop_summed, n >= 1) %>%
  mutate(crop_usgs = "Double") %>%
    select(state_fips = State_FIPS_code, 
           state_alpha = STATE_ALPHA, 
           Categories = Categories.CDL,
           crop_usgs,
           year = Year, 
           cat, 
           cmpd_usgs,
           kg_ha,
           n) %>%
    ungroup() %>%
    mutate(Categories = as.character(Categories)) %>%
    complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
    mutate(unsurveyed = 0,
           noncrop = 0) %>%
    select(cmpd_usgs, year, state_alpha, state_fips, value, Categories, crop_usgs, kg_ha, unsurveyed, noncrop) %>%
    arrange(cmpd_usgs, year, state_alpha, value)

Warning: Factor `State` contains implicit NA, consider using
`forcats::fct_explicit_na`

Warning: Factor `cmpd_usgs` contains implicit NA, consider using
`forcats::fct_explicit_na`

Warning: Factor `STATE_ALPHA` contains implicit NA, consider using
`forcats::fct_explicit_na`

Adding missing grouping variables: `value`, `State`

head(reclass_cmpd)

# A tibble: 6 x 10
  cmpd_usgs  year state_alpha state_fips value Categories crop_usgs   kg_ha
  <fct>     <int> <fct>            <int> <int> <chr>      <chr>       <dbl>
1 24D        1997 CA                   6    26 Dbl Crop … Double    0.288  
2 24D        1997 CA                   6   225 Dbl Crop … Double    0.341  
3 24D        1997 CA                   6   226 Dbl Crop … Double    0.318  
4 24D        1997 CA                   6   230 Dbl Crop … Double    0.294  
5 24D        1997 CA                   6   231 Dbl Crop … Double    0.0122 
6 24D        1997 CA                   6   232 Dbl Crop … Double    0.00611
# … with 2 more variables: unsurveyed <dbl>, noncrop <dbl>

table(reclass_cmpd$year)


1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 
1863 2419 2349 2324 2166 2217 2199 2025 2077 2065 2205 2290 2404 2391 2464 
2012 2013 2014 2015 2016 2017 
2464 2598 2584 2620 2673 2609

Save double crop reclass table

This is the pesticide reclass table for double crops, and will be combined with the global reclass table.

#label output with today's date
date <- format(lubridate::today(), format='%Y%m%d')

filename <- paste0("./output_big/beetox_I_cdl_reclass_dblcrops_CA_",date,".csv")
write.csv(reclass, filename, row.names=FALSE)

filename_H <- paste0("./output_big/beetox_H_cdl_reclass_dblcrops_CA_",date,".csv")
write.csv(reclass_H, filename_H, row.names=FALSE)

filename_F <- paste0("./output_big/beetox_F_cdl_reclass_dblcrops_CA_",date,".csv")
write.csv(reclass_F, filename_F, row.names=FALSE)

filename_cmpd <- paste0("./output_big/beetox_cmpd_cdl_reclass_dblcrops_CA_",date,".csv")
write.csv(reclass_cmpd, filename_cmpd, row.names=FALSE)

Notes

Took slightly different approach with category sums vs. compound-specific data…
For category sums, only included double crop estimate if a numerical estimate was available for both crops in the double crop, made sure there was an entry for every CDL category, marking as NA if estimates were unavailable for one or more of the double crops.
For compound sums, included double crop estimate if a numerical estimate was available for at least one of the crops in the double crop (since some AIs may be used in one crop but not others). Did not ensure all CDL categories were represented.

Session information

sessionInfo()

R version 3.6.1 (2019-07-05)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Catalina 10.15.7

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] tidyr_1.1.0   stringr_1.4.0 dplyr_0.8.3  

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.6       knitr_1.23       magrittr_1.5     tidyselect_1.1.0
 [5] R6_2.4.0         rlang_0.4.7      fansi_0.4.0      tools_3.6.1     
 [9] xfun_0.8         utf8_1.1.4       cli_1.1.0        htmltools_0.3.6 
[13] ellipsis_0.2.0.1 yaml_2.2.0       assertthat_0.2.1 digest_0.6.20   
[17] tibble_2.1.3     lifecycle_0.2.0  crayon_1.3.4     purrr_0.3.2     
[21] vctrs_0.3.2      glue_1.3.1       evaluate_0.14    rmarkdown_1.14  
[25] stringi_1.4.3    compiler_3.6.1   pillar_1.4.2     pkgconfig_2.0.2

This R Markdown site was created with workflowr