Last updated: 2021-10-19
This code adjusts the pesticide use indices to account for scenarios of double cropping (sequential planting of two crops in the same field within a growing season) for all states except California. In double-cropped fields, we assume the total pesticide use (both application rate and units of honey bee toxicity per acre) is a sum of average pesticide use on the component crops.
library(dplyr); library(stringr); library(tidyr)
The required datasets for this code are created by the script that calculates the bee toxicity index for state-year-crop combinations.
#load pesticide data grouped by category (insecticide, herbicide, etc)
by_cat <- read.csv("../output_big/bee_tox_index_state_yr_cat_20210625.csv") %>%
filter(STATE_ALPHA!="CA")
by_compound <- read.csv("../output_big/bee_tox_index_state_yr_cmpd_20200609.csv") %>%
filter(STATE_ALPHA!="CA")
#load crop data layer reclass table
#'USGS group' column matches 'crop' in the pesticide table
cdl_all <- read.csv("../keys/cdl_reclass.csv")
head(cdl_all)
value Categories USGS_group
1 1 Corn Corn
2 2 Cotton Cotton
3 3 Rice Rice
4 4 Sorghum Other_crops
5 5 Soybeans Soybeans
6 6 Sunflower Other_crops
dbl_crops <- filter(cdl_all, str_detect(Categories, pattern='Dbl Crop')) %>%
mutate(Categories_tmp = gsub(Categories, pattern="Dbl Crop", replacement="")) %>%
separate(col=Categories_tmp, into= c("Crop1", "Crop2"), sep ="/") %>%
select(value, Categories, Crop1, Crop2, everything()) %>%
mutate(Crop1 = trimws(Crop1), Crop2=trimws(Crop2))
#store names of double crops that are not in larger cdl dataset (altered name)
tofix_crop1 <- unique(dbl_crops$Crop1[!dbl_crops$Crop1 %in% cdl_all$Categories])
tofix_crop2 <- unique(dbl_crops$Crop2[!dbl_crops$Crop2 %in% cdl_all$Categories])
#correct crop names to match larger CDL table
dbl_crops <- mutate(dbl_crops, Crop1 = if_else(Crop1 == 'WinWht', 'Winter Wheat',
if_else(Crop1 == 'Durum Wht', "Durum Wheat", Crop1)),
Crop2 = if_else(Crop2 == 'Cantaloupe', 'Cantaloupes',
if_else(Crop2 == 'Durum Wht', "Durum Wheat", Crop2))
)
#check that all crop names (including double crops) exist in larger CDL dataset (value of 0 here means all names are correct)
which(!c(dbl_crops$Crop1, dbl_crops$Crop2) %in% cdl_all$Categories)
integer(0)
head(dbl_crops)
value Categories Crop1 Crop2 USGS_group
1 26 Dbl Crop WinWht/Soybeans Winter Wheat Soybeans Double
2 225 Dbl Crop WinWht/Corn Winter Wheat Corn Double
3 226 Dbl Crop Oats/Corn Oats Corn Double
4 230 Dbl Crop Lettuce/Durum Wht Lettuce Durum Wheat Double
5 231 Dbl Crop Lettuce/Cantaloupe Lettuce Cantaloupes Double
6 232 Dbl Crop Lettuce/Cotton Lettuce Cotton Double
Convert to ‘long’ format to combine double crop classes with USGS group.
dbl_long <- select(dbl_crops, value, Categories, Crop1, Crop2) %>%
rename(Categories.CDL = Categories) %>%
tidyr::pivot_longer(-value & -Categories.CDL,
values_to='CmpCrop', names_to='remove') %>%
select(-remove)
#match new long format data to appropriate USGS group
dbl_long <- select(cdl_all, Categories, USGS_group) %>%
right_join(dbl_long,by=c('Categories' = 'CmpCrop')) %>%
rename(Categories.CmpCrop = Categories, USGS_group.CmpCrop = USGS_group) %>%
select(value, Categories.CDL, Categories.CmpCrop, USGS_group.CmpCrop)
head(dbl_long)
value Categories.CDL Categories.CmpCrop USGS_group.CmpCrop
1 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
2 26 Dbl Crop WinWht/Soybeans Soybeans Soybeans
3 225 Dbl Crop WinWht/Corn Winter Wheat Wheat
4 225 Dbl Crop WinWht/Corn Corn Corn
5 226 Dbl Crop Oats/Corn Oats Not_surveyed
6 226 Dbl Crop Oats/Corn Corn Corn
#using left join filters data to only CDL classes that were double cropped
by_cat_dblcrop <- dbl_long %>%
left_join(select(by_cat, -ha), by=c("USGS_group.CmpCrop" = 'crop'))
head(by_cat_dblcrop)
value Categories.CDL Categories.CmpCrop USGS_group.CmpCrop
1 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
2 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
3 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
4 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
5 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
6 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
Year State STATE_ALPHA State_FIPS_code cat kg_app kg_ha
1 1997 Alabama AL 1 F 13394.4 0.3871593631
2 1997 Alabama AL 1 H 7420.8 0.2144950279
3 1997 Alabama AL 1 I 791.0 0.0228635143
4 1997 Arizona AZ 4 F 2.9 0.0000684443
5 1997 Arizona AZ 4 H 22341.2 0.5272854438
6 1997 Arizona AZ 4 I 13903.9 0.3281526544
ld50_ct_ha ld50_ct_tot ld50_or_ha ld50_or_tot source
1 NA NA NA NA usgs
2 NA NA NA NA usgs
3 158180098 5.472495e+12 55127026 1.907208e+12 usgs
4 NA NA NA NA usgs
5 NA NA NA NA usgs
6 207959712 8.811299e+12 47697700 2.020962e+12 usgs
#using left join filters data to only CDL classes that were double cropped
by_cmpd_dblcrop <- dbl_long %>%
left_join(select(by_compound, -ha), by=c("USGS_group.CmpCrop" = 'crop'))
head(by_cat_dblcrop)
value Categories.CDL Categories.CmpCrop USGS_group.CmpCrop
1 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
2 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
3 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
4 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
5 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
6 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
Year State STATE_ALPHA State_FIPS_code cat kg_app kg_ha
1 1997 Alabama AL 1 F 13394.4 0.3871593631
2 1997 Alabama AL 1 H 7420.8 0.2144950279
3 1997 Alabama AL 1 I 791.0 0.0228635143
4 1997 Arizona AZ 4 F 2.9 0.0000684443
5 1997 Arizona AZ 4 H 22341.2 0.5272854438
6 1997 Arizona AZ 4 I 13903.9 0.3281526544
ld50_ct_ha ld50_ct_tot ld50_or_ha ld50_or_tot source
1 NA NA NA NA usgs
2 NA NA NA NA usgs
3 158180098 5.472495e+12 55127026 1.907208e+12 usgs
4 NA NA NA NA usgs
5 NA NA NA NA usgs
6 207959712 8.811299e+12 47697700 2.020962e+12 usgs
For all pesticide data (LD50 values, kg applied, etc.), generates one value for each double crop type included in the CDL.
# sum double crop values
by_cat_dblcrop_summed <- group_by(by_cat_dblcrop, value, Categories.CDL, Year, State,
State_FIPS_code, STATE_ALPHA, cat) %>%
summarise(kg_ha = sum(kg_ha, na.rm=T),
ld50_ct_ha = sum(ld50_ct_ha, na.rm=T),
ld50_or_ha = sum(ld50_or_ha, na.rm=T),
n = n())
head(by_cat_dblcrop_summed)
# A tibble: 6 x 11
# Groups: value, Categories.CDL, Year, State, State_FIPS_code,
# STATE_ALPHA [2]
value Categories.CDL Year State State_FIPS_code STATE_ALPHA cat
<int> <fct> <int> <fct> <int> <fct> <fct>
1 26 Dbl Crop WinW… 1997 Alab… 1 AL F
2 26 Dbl Crop WinW… 1997 Alab… 1 AL H
3 26 Dbl Crop WinW… 1997 Alab… 1 AL I
4 26 Dbl Crop WinW… 1997 Alab… 1 AL O
5 26 Dbl Crop WinW… 1997 Alab… 1 AL PGR
6 26 Dbl Crop WinW… 1997 Ariz… 4 AZ F
# … with 4 more variables: kg_ha <dbl>, ld50_ct_ha <dbl>,
# ld50_or_ha <dbl>, n <int>
# generate source column to indicate source of estimates
# (coded so least-solid source for each pair reported; and 'missing' for those w/ only one value)
by_cat_dblcrop_source <- select(by_cat_dblcrop, Categories.CmpCrop, value, Categories.CDL, Year, State,
State_FIPS_code, STATE_ALPHA, cat, source) %>%
pivot_wider(names_from = Categories.CmpCrop, values_from = source) %>%
unite("source_raw", 'Winter Wheat':Sorghum, remove = TRUE, na.rm=TRUE) %>%
mutate(source = recode(source_raw,
usgs_usgs = "usgs",
interp_interp = "interp",
interp_interp_s = "interp_s",
interp_s_interp = "interp_s",
usgs_interp = "interp",
interp_usgs = "interp",
interp_s_usgs = "interp_s",
usgs_interp_s = "interp_s",
interp_s_interp_s = "interp_s",
interp = "missing",
interp_s = "missing",
usgs = "missing")) %>%
na_if("missing") %>%
select(-source_raw)
by_cat_dblcrop_summed <- by_cat_dblcrop_summed %>%
left_join(by_cat_dblcrop_source)
Joining, by = c("value", "Categories.CDL", "Year", "State", "State_FIPS_code", "STATE_ALPHA", "cat")
table(by_cat_dblcrop_summed$n, by_cat_dblcrop_summed$source)
interp interp_s usgs
1 2 0 0 0
2 0 8005 4436 27426
# sum double crop values + join observations per row
by_cmpd_dblcrop_summed <- group_by(by_cmpd_dblcrop, value, Categories.CDL, Year, State, cmpd_usgs, State_FIPS_code, STATE_ALPHA, cat) %>%
summarise(kg_ha = sum(kg_ha, na.rm=T),
n = n())
head(by_cmpd_dblcrop_summed)
# A tibble: 6 x 10
# Groups: value, Categories.CDL, Year, State, cmpd_usgs,
# State_FIPS_code, STATE_ALPHA [6]
value Categories.CDL Year State cmpd_usgs State_FIPS_code STATE_ALPHA
<int> <fct> <int> <fct> <fct> <int> <fct>
1 26 Dbl Crop WinW… 1997 Alab… 24D 1 AL
2 26 Dbl Crop WinW… 1997 Alab… ACIFLUOR… 1 AL
3 26 Dbl Crop WinW… 1997 Alab… ALACHLOR 1 AL
4 26 Dbl Crop WinW… 1997 Alab… BENOMYL 1 AL
5 26 Dbl Crop WinW… 1997 Alab… BENTAZONE 1 AL
6 26 Dbl Crop WinW… 1997 Alab… CAPTAN 1 AL
# … with 3 more variables: cat <fct>, kg_ha <dbl>, n <int>
# filter to only insecticide category & where both double-crops present & pre-2015 (ST present)
# also reorganize to match other reclass formatting and add surveyed/not column
# rows with both double-crops present in dataset = surveyed, otherwise not
reclass <- filter(by_cat_dblcrop_summed, cat == 'I', Year < 2015) %>%
mutate(ld50_ct_ha_bil = ld50_ct_ha/10^9,
ld50_or_ha_bil = ld50_or_ha/10^9,
crop_usgs = "Double") %>%
ungroup() %>%
select(state_fips = State_FIPS_code,
state_alpha = STATE_ALPHA,
value,
Categories = Categories.CDL,
crop_usgs,
year = Year,
cat,
kg_ha,
ld50_ct_ha_bil,
ld50_or_ha_bil,
n,
source) %>%
complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
mutate(unsurveyed = ifelse(str_detect(Categories, "Oats"), 0.5, 0),
kg_ha = ifelse(n==2, kg_ha, NA),
ld50_ct_ha_bil = ifelse(n==2, ld50_ct_ha_bil, NA),
ld50_or_ha_bil = ifelse(n==2, ld50_or_ha_bil, NA),
noncrop = 0) %>%
select(year, state_alpha, state_fips, value, Categories, crop_usgs, kg_ha, ld50_ct_ha_bil, ld50_or_ha_bil, unsurveyed, noncrop, source) %>%
arrange(year, state_alpha, value)
head(reclass)
# A tibble: 6 x 12
year state_alpha state_fips value Categories crop_usgs kg_ha
<int> <fct> <int> <int> <fct> <chr> <dbl>
1 1997 AL 1 26 Dbl Crop … Double 0.0807
2 1997 AL 1 225 Dbl Crop … Double 0.0836
3 1997 AL 1 226 Dbl Crop … Double NA
4 1997 AL 1 230 Dbl Crop … Double 4.85
5 1997 AL 1 231 Dbl Crop … Double 9.65
6 1997 AL 1 232 Dbl Crop … Double 5.76
# … with 5 more variables: ld50_ct_ha_bil <dbl>, ld50_or_ha_bil <dbl>,
# unsurveyed <dbl>, noncrop <dbl>, source <chr>
table(reclass$year) # 47 states x 16 double crops = 752
1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
752 752 752 752 752 752 752 752 752 752 752 752 752 752 752
2012 2013 2014
752 752 752
# filter to where both double-crops present
# also reorganize to match other reclass formatting and add surveyed/not column
# rows with both double-crops present in dataset = included, otherwise NA
# split into herbicides, fungicides
reclass_cat <- filter(by_cat_dblcrop_summed, cat %in% c("H", "F")) %>%
mutate(crop_usgs = "Double") %>%
ungroup() %>%
select(state_fips = State_FIPS_code,
state_alpha = STATE_ALPHA,
value,
Categories = Categories.CDL,
crop_usgs,
year = Year,
cat,
kg_ha,
n,
source)
reclass_H <- filter(reclass_cat, cat == "H") %>%
complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
mutate(n = replace_na(n, 0),
unsurveyed = ifelse(str_detect(Categories, "Oats"), 0.5, 0),
kg_ha = ifelse(n==2, kg_ha, NA),
noncrop = 0,
cat = "H") %>%
select(-n) %>%
arrange(year, state_alpha, value)
reclass_F <- filter(reclass_cat, cat == "F") %>%
complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
mutate(n = replace_na(n, 0),
unsurveyed = ifelse(str_detect(Categories, "Oats"), 0.5, 0),
kg_ha = ifelse(n==2, kg_ha, NA),
noncrop = 0,
cat = "F") %>%
select(-n) %>%
arrange(year, state_alpha, value)
table(reclass_H$year) # 47 states x 16 double crops = 752
1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
752 752 752 752 752 752 752 752 752 752 752 752 752 752 752
2012 2013 2014 2015 2016 2017
752 752 752 752 752 752
table(reclass_F$year)
1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
752 752 752 752 752 752 752 752 752 752 752 752 752 752 752
2012 2013 2014 2015 2016 2017
752 752 752 752 752 752
head(reclass_H)
# A tibble: 6 x 11
state_alpha state_fips year Categories value crop_usgs cat kg_ha
<fct> <int> <int> <fct> <int> <chr> <chr> <dbl>
1 AL 1 1997 Dbl Crop … 26 Double H 1.82
2 AL 1 1997 Dbl Crop … 225 Double H 3.91
3 AL 1 1997 Dbl Crop … 226 Double H NA
4 AL 1 1997 Dbl Crop … 230 Double H 19.3
5 AL 1 1997 Dbl Crop … 231 Double H 38.2
6 AL 1 1997 Dbl Crop … 232 Double H 23.5
# … with 3 more variables: source <chr>, unsurveyed <dbl>, noncrop <dbl>
head(reclass_F)
# A tibble: 6 x 11
state_alpha state_fips year Categories value crop_usgs cat kg_ha
<fct> <int> <int> <fct> <int> <chr> <chr> <dbl>
1 AL 1 1997 Dbl Crop … 26 Double F 0.398
2 AL 1 1997 Dbl Crop … 225 Double F 0.390
3 AL 1 1997 Dbl Crop … 226 Double F NA
4 AL 1 1997 Dbl Crop … 230 Double F 21.9
5 AL 1 1997 Dbl Crop … 231 Double F 43.0
6 AL 1 1997 Dbl Crop … 232 Double F 21.8
# … with 3 more variables: source <chr>, unsurveyed <dbl>, noncrop <dbl>
#filter to only where both double-crops are present
# also reorganize to match other reclass formatting and add surveyed/not column
reclass_cmpd <- filter(by_cmpd_dblcrop_summed, n >= 1) %>%
mutate(crop_usgs = "Double") %>%
select(state_fips = State_FIPS_code,
state_alpha = STATE_ALPHA,
Categories = Categories.CDL,
crop_usgs,
year = Year,
cat,
cmpd_usgs,
kg_ha,
n) %>%
ungroup() %>%
mutate(Categories = as.character(Categories)) %>%
complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
mutate(unsurveyed = ifelse(str_detect(Categories, "Oats"), 0.5, 0),
noncrop = 0) %>%
select(cmpd_usgs, year, state_alpha, state_fips, value, Categories, crop_usgs, kg_ha, unsurveyed, noncrop) %>%
arrange(cmpd_usgs, year, state_alpha, value)
Adding missing grouping variables: `value`, `State`
head(reclass_cmpd)
# A tibble: 6 x 10
cmpd_usgs year state_alpha state_fips value Categories crop_usgs kg_ha
<fct> <int> <fct> <int> <int> <chr> <chr> <dbl>
1 24D 1997 AL 1 26 Dbl Crop … Double 0.183
2 24D 1997 AL 1 225 Dbl Crop … Double 0.197
3 24D 1997 AL 1 226 Dbl Crop … Double 0.0138
4 24D 1997 AL 1 230 Dbl Crop … Double 0.183
5 24D 1997 AL 1 233 Dbl Crop … Double 0.0974
6 24D 1997 AL 1 234 Dbl Crop … Double 0.280
# … with 2 more variables: unsurveyed <dbl>, noncrop <dbl>
table(reclass_cmpd$year)
1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008
37541 35554 35365 36081 35682 35628 35163 37393 36167 37791 37092 38068
2009 2010 2011 2012 2013 2014 2015 2016 2017
40884 42309 42540 43247 45493 46001 41553 40689 40163
This is the pesticide reclass table for double crops, and will be combined with the global reclass table.
#label output with today's date
date <- format(lubridate::today(), format='%Y%m%d')
filename <- paste0("./output_big/beetox_I_cdl_reclass_dblcrops_",date,".csv")
write.csv(reclass, filename, row.names=FALSE)
filename_H <- paste0("./output_big/beetox_H_cdl_reclass_dblcrops_",date,".csv")
write.csv(reclass_H, filename_H, row.names=FALSE)
filename_F <- paste0("./output_big/beetox_F_cdl_reclass_dblcrops_",date,".csv")
write.csv(reclass_F, filename_F, row.names=FALSE)
filename_cmpd <- paste0("./output_big/beetox_cmpd_cdl_reclass_dblcrops_",date,".csv")
write.csv(reclass_cmpd, filename_cmpd, row.names=FALSE)
sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Catalina 10.15.7
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] tidyr_1.1.0 stringr_1.4.0 dplyr_0.8.3
loaded via a namespace (and not attached):
[1] Rcpp_1.0.6 knitr_1.23 magrittr_1.5 tidyselect_1.1.0
[5] R6_2.4.0 rlang_0.4.7 fansi_0.4.0 tools_3.6.1
[9] xfun_0.8 utf8_1.1.4 cli_1.1.0 htmltools_0.3.6
[13] ellipsis_0.2.0.1 yaml_2.2.0 assertthat_0.2.1 digest_0.6.20
[17] tibble_2.1.3 lifecycle_0.2.0 crayon_1.3.4 purrr_0.3.2
[21] vctrs_0.3.2 glue_1.3.1 evaluate_0.14 rmarkdown_1.14
[25] stringi_1.4.3 compiler_3.6.1 pillar_1.4.2 pkgconfig_2.0.2
This R Markdown site was created with workflowr