Last updated: 2021-10-19
This code adjusts the pesticide use indices to account for scenarios of double cropping (sequential planting of two crops in the same field within a growing season) for California only. In double-cropped fields, we assume the total pesticide use (both application rate and units of honey bee toxicity per acre) is a sum of average pesticide use on the component crops.
library(dplyr); library(stringr); library(tidyr)
The required datasets for this code are created by the script that calculates the bee toxicity index for state-year-crop combinations.
#load pesticide data grouped by category (insecticide, herbicide, etc)
by_cat <- read.csv("../output_big/bee_tox_index_state_yr_cat_20210625.csv") %>%
filter(STATE_ALPHA=="CA")
by_compound <- read.csv("../output_big/bee_tox_index_state_yr_cmpd_20200609.csv") %>%
filter(STATE_ALPHA=="CA")
#load crop data layer reclass table
#'USGS group' column matches 'crop' in the pesticide table
cdl_all <- read.csv("../keys/cdl_reclass_CA.csv")
head(cdl_all)
value Categories USGS_group
1 1 Corn Corn
2 2 Cotton Cotton
3 3 Rice Rice
4 4 Sorghum Other_crops
5 5 Soybeans Soybeans
6 6 Sunflower Other_crops
dbl_crops <- filter(cdl_all, str_detect(Categories, pattern='Dbl Crop')) %>%
mutate(Categories_tmp = gsub(Categories, pattern="Dbl Crop", replacement="")) %>%
separate(col=Categories_tmp, into= c("Crop1", "Crop2"), sep ="/") %>%
select(value, Categories, Crop1, Crop2, everything()) %>%
mutate(Crop1 = trimws(Crop1), Crop2=trimws(Crop2))
#store names of double crops that are not in larger cdl dataset (altered name)
tofix_crop1 <- unique(dbl_crops$Crop1[!dbl_crops$Crop1 %in% cdl_all$Categories])
tofix_crop2 <- unique(dbl_crops$Crop2[!dbl_crops$Crop2 %in% cdl_all$Categories])
#correct crop names to match larger CDL table
dbl_crops <- mutate(dbl_crops, Crop1 = if_else(Crop1 == 'WinWht', 'Winter Wheat',
if_else(Crop1 == 'Durum Wht', "Durum Wheat", Crop1)),
Crop2 = if_else(Crop2 == 'Cantaloupe', 'Cantaloupes',
if_else(Crop2 == 'Durum Wht', "Durum Wheat", Crop2))
)
#check that all crop names (including double crops) exist in larger CDL dataset (value of 0 here means all names are correct)
which(!c(dbl_crops$Crop1, dbl_crops$Crop2) %in% cdl_all$Categories)
integer(0)
head(dbl_crops)
value Categories Crop1 Crop2 USGS_group
1 26 Dbl Crop WinWht/Soybeans Winter Wheat Soybeans Double
2 225 Dbl Crop WinWht/Corn Winter Wheat Corn Double
3 226 Dbl Crop Oats/Corn Oats Corn Double
4 230 Dbl Crop Lettuce/Durum Wht Lettuce Durum Wheat Double
5 231 Dbl Crop Lettuce/Cantaloupe Lettuce Cantaloupes Double
6 232 Dbl Crop Lettuce/Cotton Lettuce Cotton Double
Convert to ‘long’ format to combine double crop classes with USGS group.
dbl_long <- select(dbl_crops, value, Categories, Crop1, Crop2) %>%
rename(Categories.CDL = Categories) %>%
tidyr::pivot_longer(-value & -Categories.CDL,
values_to='CmpCrop', names_to='remove') %>%
select(-remove)
#match new long format data to appropriate USGS group
dbl_long <- select(cdl_all, Categories, USGS_group) %>%
right_join(dbl_long,by=c('Categories' = 'CmpCrop')) %>%
rename(Categories.CmpCrop = Categories, USGS_group.CmpCrop = USGS_group) %>%
select(value, Categories.CDL, Categories.CmpCrop, USGS_group.CmpCrop)
head(dbl_long)
value Categories.CDL Categories.CmpCrop USGS_group.CmpCrop
1 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
2 26 Dbl Crop WinWht/Soybeans Soybeans Soybeans
3 225 Dbl Crop WinWht/Corn Winter Wheat Wheat
4 225 Dbl Crop WinWht/Corn Corn Corn
5 226 Dbl Crop Oats/Corn Oats Other_crops
6 226 Dbl Crop Oats/Corn Corn Corn
#using left join filters data to only CDL classes that were double cropped
by_cat_dblcrop <- dbl_long %>%
left_join(select(by_cat, -ha), by=c("USGS_group.CmpCrop" = 'crop'))
head(by_cat_dblcrop)
value Categories.CDL Categories.CmpCrop USGS_group.CmpCrop
1 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
2 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
3 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
4 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
5 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
6 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
Year State STATE_ALPHA State_FIPS_code cat kg_app kg_ha
1 1997 California CA 6 F 2270.0 9.643054e-03
2 1997 California CA 6 FUM 1890.0 8.028798e-03
3 1997 California CA 6 H 205022.4 8.709436e-01
4 1997 California CA 6 I 22819.1 9.693648e-02
5 1997 California CA 6 O 1085.5 4.611249e-03
6 1997 California CA 6 PGR 16.7 7.094229e-05
ld50_ct_ha ld50_ct_tot ld50_or_ha ld50_or_tot source
1 NA NA NA NA usgs
2 NA NA NA NA usgs
3 NA NA NA NA usgs
4 313302864 7.375231e+13 419715080 9.880202e+13 usgs
5 NA NA NA NA usgs
6 NA NA NA NA usgs
#using left join filters data to only CDL classes that were double cropped
by_cmpd_dblcrop <- dbl_long %>%
left_join(select(by_compound, -ha), by=c("USGS_group.CmpCrop" = 'crop'))
head(by_cat_dblcrop)
value Categories.CDL Categories.CmpCrop USGS_group.CmpCrop
1 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
2 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
3 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
4 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
5 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
6 26 Dbl Crop WinWht/Soybeans Winter Wheat Wheat
Year State STATE_ALPHA State_FIPS_code cat kg_app kg_ha
1 1997 California CA 6 F 2270.0 9.643054e-03
2 1997 California CA 6 FUM 1890.0 8.028798e-03
3 1997 California CA 6 H 205022.4 8.709436e-01
4 1997 California CA 6 I 22819.1 9.693648e-02
5 1997 California CA 6 O 1085.5 4.611249e-03
6 1997 California CA 6 PGR 16.7 7.094229e-05
ld50_ct_ha ld50_ct_tot ld50_or_ha ld50_or_tot source
1 NA NA NA NA usgs
2 NA NA NA NA usgs
3 NA NA NA NA usgs
4 313302864 7.375231e+13 419715080 9.880202e+13 usgs
5 NA NA NA NA usgs
6 NA NA NA NA usgs
For all pesticide data (LD50 values, kg applied, etc.), generates one value for each double crop type included in the CDL.
# sum double crop values
by_cat_dblcrop_summed <- group_by(by_cat_dblcrop, value, Categories.CDL, Year, State,
State_FIPS_code, STATE_ALPHA, cat) %>%
summarise(kg_ha = sum(kg_ha, na.rm=T),
ld50_ct_ha = sum(ld50_ct_ha, na.rm=T),
ld50_or_ha = sum(ld50_or_ha, na.rm=T),
n = n())
head(by_cat_dblcrop_summed)
# A tibble: 6 x 11
# Groups: value, Categories.CDL, Year, State, State_FIPS_code,
# STATE_ALPHA [1]
value Categories.CDL Year State State_FIPS_code STATE_ALPHA cat
<int> <fct> <int> <fct> <int> <fct> <fct>
1 26 Dbl Crop WinW… 1997 Cali… 6 CA F
2 26 Dbl Crop WinW… 1997 Cali… 6 CA FUM
3 26 Dbl Crop WinW… 1997 Cali… 6 CA H
4 26 Dbl Crop WinW… 1997 Cali… 6 CA I
5 26 Dbl Crop WinW… 1997 Cali… 6 CA O
6 26 Dbl Crop WinW… 1997 Cali… 6 CA PGR
# … with 4 more variables: kg_ha <dbl>, ld50_ct_ha <dbl>,
# ld50_or_ha <dbl>, n <int>
# generate source column to indicate source of estimates
# (coded so least-solid source for each pair reported; and 'missing' for those w/ only one value)
by_cat_dblcrop_source <- select(by_cat_dblcrop, Categories.CmpCrop, value, Categories.CDL, Year, State,
State_FIPS_code, STATE_ALPHA, cat, source) %>%
pivot_wider(names_from = Categories.CmpCrop, values_from = source) %>%
unite("source_raw", 'Winter Wheat':Sorghum, remove = TRUE, na.rm=TRUE) %>%
mutate(source = recode(source_raw,
usgs_usgs = "usgs",
interp_interp = "interp",
interp_interp_s = "interp_s",
interp_s_interp = "interp_s",
usgs_interp = "interp",
interp_usgs = "interp",
interp_s_usgs = "interp_s",
usgs_interp_s = "interp_s",
interp_s_interp_s = "interp_s",
interp = "missing",
interp_s = "missing",
usgs = "missing")) %>%
na_if("missing") %>%
select(-source_raw)
by_cat_dblcrop_summed <- by_cat_dblcrop_summed %>%
left_join(by_cat_dblcrop_source)
Joining, by = c("value", "Categories.CDL", "Year", "State", "State_FIPS_code", "STATE_ALPHA", "cat")
table(by_cat_dblcrop_summed$n, by_cat_dblcrop_summed$source)
interp usgs
1 5 0 0
2 0 122 1231
# sum double crop values + join observations per row
by_cmpd_dblcrop_summed <- group_by(by_cmpd_dblcrop, value, Categories.CDL, Year, State, cmpd_usgs,State_FIPS_code, STATE_ALPHA, cat) %>%
summarise(kg_ha = sum(kg_ha, na.rm=T),
n = n())
head(by_cmpd_dblcrop_summed)
# A tibble: 6 x 10
# Groups: value, Categories.CDL, Year, State, cmpd_usgs,
# State_FIPS_code, STATE_ALPHA [6]
value Categories.CDL Year State cmpd_usgs State_FIPS_code STATE_ALPHA
<int> <fct> <int> <fct> <fct> <int> <fct>
1 26 Dbl Crop WinW… 1997 Cali… 24D 6 CA
2 26 Dbl Crop WinW… 1997 Cali… ALUMINUM… 6 CA
3 26 Dbl Crop WinW… 1997 Cali… BARBAN 6 CA
4 26 Dbl Crop WinW… 1997 Cali… BROMOXYN… 6 CA
5 26 Dbl Crop WinW… 1997 Cali… CALCIUMC… 6 CA
6 26 Dbl Crop WinW… 1997 Cali… CAPTAN 6 CA
# … with 3 more variables: cat <fct>, kg_ha <dbl>, n <int>
# filter to only insecticide category & where both double-crops present
# also reorganize to match other reclass formatting and add surveyed/not column (all double crops surveyed in CA)
# rows with both double-crops present in dataset = surveyed, otherwise not
reclass <- filter(by_cat_dblcrop_summed, cat == 'I') %>%
mutate(ld50_ct_ha_bil = ld50_ct_ha/10^9,
ld50_or_ha_bil = ld50_or_ha/10^9,
crop_usgs = "Double") %>%
select(state_fips = State_FIPS_code,
state_alpha = STATE_ALPHA,
Categories = Categories.CDL,
crop_usgs,
year = Year,
cat,
kg_ha,
ld50_ct_ha_bil,
ld50_or_ha_bil,
n,
source) %>%
ungroup() %>%
complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
mutate(unsurveyed = 0,
kg_ha = ifelse(n==2, kg_ha, NA),
ld50_ct_ha_bil = ifelse(n==2, ld50_ct_ha_bil, NA),
ld50_or_ha_bil = ifelse(n==2, ld50_or_ha_bil, NA),
noncrop = 0) %>%
select(year, state_alpha, state_fips, value, Categories, crop_usgs, kg_ha, ld50_ct_ha_bil, ld50_or_ha_bil, unsurveyed, noncrop, source) %>%
arrange(year, state_alpha, value)
Adding missing grouping variables: `value`, `State`
head(reclass)
# A tibble: 6 x 12
year state_alpha state_fips value Categories crop_usgs kg_ha
<int> <fct> <int> <int> <fct> <chr> <dbl>
1 1997 CA 6 26 Dbl Crop … Double NA
2 1997 CA 6 225 Dbl Crop … Double 1.48
3 1997 CA 6 226 Dbl Crop … Double 2.24
4 1997 CA 6 230 Dbl Crop … Double 2.43
5 1997 CA 6 231 Dbl Crop … Double 4.66
6 1997 CA 6 232 Dbl Crop … Double 5.95
# … with 5 more variables: ld50_ct_ha_bil <dbl>, ld50_or_ha_bil <dbl>,
# unsurveyed <dbl>, noncrop <dbl>, source <chr>
table(reclass$year)
1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
16 16 16 16 16 16 16 16 16 16 16 16 16 16 16
2012 2013 2014
16 16 16
# also reorganize to match other reclass formatting and add surveyed/not column (all double crops surveyed in CA)
# rows with both double-crops present in dataset = included, otherwise NA
# split into herbicides, fungicides
reclass_cat <- filter(by_cat_dblcrop_summed, cat %in% c("H", "F")) %>%
mutate(crop_usgs = "Double") %>%
ungroup() %>%
select(state_fips = State_FIPS_code,
state_alpha = STATE_ALPHA,
value,
Categories = Categories.CDL,
crop_usgs,
year = Year,
cat,
kg_ha,
n,
source)
reclass_H <- filter(reclass_cat, cat == "H") %>%
complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
mutate(n = replace_na(n, 0),
unsurveyed = 0,
kg_ha = ifelse(n==2, kg_ha, NA),
noncrop = 0,
cat = "H") %>%
select(-n) %>%
arrange(year, state_alpha, value)
reclass_F <- filter(reclass_cat, cat == "F") %>%
complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
mutate(n = replace_na(n, 0),
unsurveyed = 0,
kg_ha = ifelse(n==2, kg_ha, NA),
noncrop = 0,
cat = "F") %>%
select(-n) %>%
arrange(year, state_alpha, value)
table(reclass_H$year)
1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
16 16 16 16 16 16 16 16 16 16 16 16 16 16 16
2012 2013 2014 2015 2016 2017
16 16 16 16 16 16
table(reclass_F$year)
1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
16 16 16 16 16 16 16 16 16 16 16 16 16 16 16
2012 2013 2014 2015 2016 2017
16 16 16 16 16 16
# filter to only where at least one double-crops present
# also reorganize to match other reclass formatting and add surveyed/not column
reclass_cmpd <- filter(by_cmpd_dblcrop_summed, n >= 1) %>%
mutate(crop_usgs = "Double") %>%
select(state_fips = State_FIPS_code,
state_alpha = STATE_ALPHA,
Categories = Categories.CDL,
crop_usgs,
year = Year,
cat,
cmpd_usgs,
kg_ha,
n) %>%
ungroup() %>%
mutate(Categories = as.character(Categories)) %>%
complete(nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
mutate(unsurveyed = 0,
noncrop = 0) %>%
select(cmpd_usgs, year, state_alpha, state_fips, value, Categories, crop_usgs, kg_ha, unsurveyed, noncrop) %>%
arrange(cmpd_usgs, year, state_alpha, value)
Warning: Factor `State` contains implicit NA, consider using
`forcats::fct_explicit_na`
Warning: Factor `cmpd_usgs` contains implicit NA, consider using
`forcats::fct_explicit_na`
Warning: Factor `STATE_ALPHA` contains implicit NA, consider using
`forcats::fct_explicit_na`
Adding missing grouping variables: `value`, `State`
head(reclass_cmpd)
# A tibble: 6 x 10
cmpd_usgs year state_alpha state_fips value Categories crop_usgs kg_ha
<fct> <int> <fct> <int> <int> <chr> <chr> <dbl>
1 24D 1997 CA 6 26 Dbl Crop … Double 0.288
2 24D 1997 CA 6 225 Dbl Crop … Double 0.341
3 24D 1997 CA 6 226 Dbl Crop … Double 0.318
4 24D 1997 CA 6 230 Dbl Crop … Double 0.294
5 24D 1997 CA 6 231 Dbl Crop … Double 0.0122
6 24D 1997 CA 6 232 Dbl Crop … Double 0.00611
# … with 2 more variables: unsurveyed <dbl>, noncrop <dbl>
table(reclass_cmpd$year)
1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
1863 2419 2349 2324 2166 2217 2199 2025 2077 2065 2205 2290 2404 2391 2464
2012 2013 2014 2015 2016 2017
2464 2598 2584 2620 2673 2609
This is the pesticide reclass table for double crops, and will be combined with the global reclass table.
#label output with today's date
date <- format(lubridate::today(), format='%Y%m%d')
filename <- paste0("./output_big/beetox_I_cdl_reclass_dblcrops_CA_",date,".csv")
write.csv(reclass, filename, row.names=FALSE)
filename_H <- paste0("./output_big/beetox_H_cdl_reclass_dblcrops_CA_",date,".csv")
write.csv(reclass_H, filename_H, row.names=FALSE)
filename_F <- paste0("./output_big/beetox_F_cdl_reclass_dblcrops_CA_",date,".csv")
write.csv(reclass_F, filename_F, row.names=FALSE)
filename_cmpd <- paste0("./output_big/beetox_cmpd_cdl_reclass_dblcrops_CA_",date,".csv")
write.csv(reclass_cmpd, filename_cmpd, row.names=FALSE)
sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Catalina 10.15.7
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] tidyr_1.1.0 stringr_1.4.0 dplyr_0.8.3
loaded via a namespace (and not attached):
[1] Rcpp_1.0.6 knitr_1.23 magrittr_1.5 tidyselect_1.1.0
[5] R6_2.4.0 rlang_0.4.7 fansi_0.4.0 tools_3.6.1
[9] xfun_0.8 utf8_1.1.4 cli_1.1.0 htmltools_0.3.6
[13] ellipsis_0.2.0.1 yaml_2.2.0 assertthat_0.2.1 digest_0.6.20
[17] tibble_2.1.3 lifecycle_0.2.0 crayon_1.3.4 purrr_0.3.2
[21] vctrs_0.3.2 glue_1.3.1 evaluate_0.14 rmarkdown_1.14
[25] stringi_1.4.3 compiler_3.6.1 pillar_1.4.2 pkgconfig_2.0.2
This R Markdown site was created with workflowr