Last updated: 2021-10-20
This code generates reclassification tables to convert the CDL land use types into pesticide intensity for a particular pesticide compound.
library(tidyverse)
── Attaching packages ───────────────────────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.2.0 ✔ purrr 0.3.2
✔ tibble 2.1.3 ✔ dplyr 0.8.3
✔ tidyr 1.1.0 ✔ stringr 1.4.0
✔ readr 1.3.1 ✔ forcats 0.4.0
── Conflicts ──────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
# Key template with all CDL land use categories & corresponding USGS crop categories
cdl_key <- read.csv("../keys/cdl_reclass.csv") %>%
select(value,
Categories,
crop = USGS_group) %>%
mutate(crop = as.character(crop)) %>%
filter(crop != "Double")
cdl_key_orig <- read.csv("../keys/cdl_reclass.csv") %>%
select(value,
Categories,
crop = USGS_group) %>%
mutate(crop = as.character(crop))
cdl_key_CA <- read.csv("../keys/cdl_reclass_CA.csv") %>%
select(value,
Categories,
crop = USGS_group) %>%
mutate(crop = as.character(crop)) %>%
filter(crop != "Double")
# Bee tox/pesticide data
beetox <- read.csv("../output_big/bee_tox_index_state_yr_cmpd_20200609.csv")
# Double crop data
double <- read.csv("../output_big/beetox_cmpd_cdl_reclass_dblcrops_20210601.csv")
double_CA <- read.csv("../output_big/beetox_cmpd_cdl_reclass_dblcrops_CA_20210601.csv")
# filter and summarize data for California
# generate reclass table
data_CA <- beetox %>%
filter(STATE_ALPHA == "CA") %>% #select appropriate rows + CA
select(cmpd_usgs,
state_fips = State_FIPS_code,
state_alpha = STATE_ALPHA,
year = Year,
cat,
crop,
kg_ha) %>%
ungroup() %>%
mutate(crop = as.character(crop)) %>%
full_join(cdl_key_CA, by="crop") %>%
complete(cmpd_usgs, nesting(state_alpha, state_fips), year, nesting(Categories, value, crop)) %>%
drop_na(state_alpha, year) %>%
rename(crop_usgs = crop) %>%
select(cmpd_usgs, year, state_alpha, state_fips, value, Categories, crop_usgs, kg_ha) %>%
bind_rows(double_CA) %>%
complete(cmpd_usgs, nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
drop_na(cmpd_usgs, state_alpha, year) %>%
mutate(unsurveyed = ifelse(crop_usgs=="Not_surveyed",1,0),
noncrop = ifelse(crop_usgs=="Non_crop",1,0)) %>%
#mutate(unsurveyed = ifelse(is.na(unsurveyed), 0, unsurveyed),
#noncrop = ifelse(is.na(noncrop), 0, noncrop)) %>% #fills in noncrop for missing double crops
arrange(cmpd_usgs, year, state_alpha, value)
# filter and summarize data for all states except California
# generate reclass table
# this is a pretty slow operation
data <- beetox %>%
filter(STATE_ALPHA != "CA") %>% #select appropriate rows (exclude CA)
select(cmpd_usgs,
state_fips = State_FIPS_code,
state_alpha = STATE_ALPHA,
year = Year,
cat,
crop,
kg_ha) %>%
ungroup() %>%
mutate(crop = as.character(crop)) %>%
full_join(cdl_key, by="crop") %>%
complete(cmpd_usgs, nesting(state_alpha, state_fips), year, nesting(Categories, value, crop)) %>%
drop_na(state_alpha, year) %>%
rename(crop_usgs = crop) %>%
select(cmpd_usgs, year, state_alpha, state_fips, value, Categories, crop_usgs, kg_ha) %>%
bind_rows(double) %>%
complete(cmpd_usgs, nesting(state_alpha, state_fips), year, nesting(Categories, value, crop_usgs)) %>%
drop_na(cmpd_usgs, state_alpha, year) %>%
mutate(unsurveyed = ifelse(crop_usgs=="Not_surveyed", 1,
ifelse(str_detect(Categories, "Oats"), 0.5, 0)),
noncrop = ifelse(crop_usgs=="Non_crop",1,0)) %>%
# mutate(unsurveyed = ifelse(is.na(unsurveyed), 0, unsurveyed),
#unsurveyed = ifelse(str_detect(Categories, "Oats"), 0.5, 0),
# noncrop = ifelse(is.na(noncrop), 0, noncrop)) %>% #fills in noncrop for missing double crops
arrange(cmpd_usgs, year, state_alpha, value)
# combine
data_full <- bind_rows(data, data_CA) %>% # bind rows
filter(!is.na(year) & !is.na(state_fips) & !is.na(cmpd_usgs))
# check overall structure and number of rows
summary(data_full)
cmpd_usgs state_alpha state_fips year
Length:67080384 Length:67080384 Min. : 1.00 Min. :1997
Class :character Class :character 1st Qu.:18.75 1st Qu.:2002
Mode :character Mode :character Median :30.50 Median :2007
Mean :30.19 Mean :2007
3rd Qu.:42.50 3rd Qu.:2012
Max. :56.00 Max. :2017
Categories value crop_usgs kg_ha
Length:67080384 Min. : 1.0 Length:67080384 Min. : 0
Class :character 1st Qu.: 43.0 Class :character 1st Qu.: 0
Mode :character Median : 77.0 Mode :character Median : 0
Mean :120.9 Mean : 0
3rd Qu.:218.0 3rd Qu.: 0
Max. :254.0 Max. :731
NA's :62751926
unsurveyed noncrop
Min. :0.0000 Min. :0.0000
1st Qu.:0.0000 1st Qu.:0.0000
Median :0.0000 Median :0.0000
Mean :0.2635 Mean :0.1527
3rd Qu.:1.0000 3rd Qu.:0.0000
Max. :1.0000 Max. :1.0000
str(data_full)
Classes 'tbl_df', 'tbl' and 'data.frame': 67080384 obs. of 10 variables:
$ cmpd_usgs : chr "1METHYLCYCLOPROPENE" "1METHYLCYCLOPROPENE" "1METHYLCYCLOPROPENE" "1METHYLCYCLOPROPENE" ...
$ state_alpha: chr "AL" "AL" "AL" "AL" ...
$ state_fips : int 1 1 1 1 1 1 1 1 1 1 ...
$ year : int 1997 1997 1997 1997 1997 1997 1997 1997 1997 1997 ...
$ Categories : chr "Corn" "Cotton" "Rice" "Sorghum" ...
$ value : int 1 2 3 4 5 6 10 11 12 13 ...
$ crop_usgs : chr "Corn" "Cotton" "Rice" "Other_crops" ...
$ kg_ha : num NA NA NA NA NA NA NA NA NA NA ...
$ unsurveyed : num 0 0 0 0 0 0 0 0 0 1 ...
$ noncrop : num 0 0 0 0 0 0 0 0 0 0 ...
head(table(data_full$year)) # 131 categories x 48 states x 508 cmpds = 3194304
1997 1998 1999 2000 2001 2002
3194304 3194304 3194304 3194304 3194304 3194304
head(table(data_full$cmpd_usgs)) # 131 x 48 states x 21 years = 132048
1METHYLCYCLOPROPENE 24D 24DB
132048 132048 132048
6BENZYLADENINE ABAMECTIN ABSCISICACID
132048 132048 132048
# check distribution of values
head(table(data_full$noncrop, data_full$crop_usgs))
Alfalfa Corn Cotton Double Non_crop Not_surveyed
0 512064 512064 512064 8193024 0 17175480
1 0 0 0 0 10241280 0
Orchards_and_grapes Other_crops Pasture_and_hay Rice Soybeans
0 7190232 4715256 2560320 512064 512064
1 0 0 0 0 0
Vegetables_and_fruit Wheat
0 12908280 1536192
1 0 0
head(table(data_full$unsurveyed, data_full$crop_usgs))
Alfalfa Corn Cotton Double Non_crop Not_surveyed
0 512064 512064 512064 7190232 10241280 0
0.5 0 0 0 1002792 0 0
1 0 0 0 0 0 17175480
Orchards_and_grapes Other_crops Pasture_and_hay Rice Soybeans
0 7190232 4715256 2560320 512064 512064
0.5 0 0 0 0 0
1 0 0 0 0 0
Vegetables_and_fruit Wheat
0 12908280 1536192
0.5 0 0
1 0 0
# check distribution of presence/absence
# total possible state-years = 48*21 = 1008
summary <- data_full %>%
filter(noncrop==0, crop_usgs!="Not_surveyed") %>%
mutate(present = ifelse(is.na(kg_ha), 0, 1),
cdl = paste(value, "_", Categories)) %>%
select(cmpd_usgs, cdl, crop_usgs, present) %>%
group_by(cmpd_usgs, cdl, crop_usgs) %>%
summarise(present = mean(present),
n = n())
## heatmap of distribution
# save pesticide classes
class_key <- beetox %>%
select(cmpd_usgs, cat) %>%
unique()
summary <- left_join(summary, class_key, by = "cmpd_usgs")
insecticides <- ggplot(filter(summary, cat=="I"), aes(x = cdl, y = cmpd_usgs)) +
geom_tile(aes(fill = present*100)) +
scale_fill_continuous(low = "white", high = "blue") +
facet_grid(. ~ crop_usgs, scales = "free_x", space = "free_x") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ggtitle("Insecticides") +
xlab("") +
ylab("") +
labs(fill = "State-years \n present (%)")
fungicides <- ggplot(filter(summary, cat=="F"), aes(x = cdl, y = cmpd_usgs)) +
geom_tile(aes(fill = present*100)) +
scale_fill_continuous(low = "white", high = "blue") +
facet_grid(. ~ crop_usgs, scales = "free_x", space = "free_x") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ggtitle("Fungicides") +
xlab("") +
ylab("") +
labs(fill = "State-years \n present (%)")
herbicides <- ggplot(filter(summary, cat=="H"), aes(x = cdl, y = cmpd_usgs)) +
geom_tile(aes(fill = present*100)) +
scale_fill_continuous(low = "white", high = "blue") +
facet_grid(. ~ crop_usgs, scales = "free_x", space = "free_x") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ggtitle("Herbicides") +
xlab("") +
ylab("") +
labs(fill = "State-years \n present (%)")
# label output with today's date
date <- format(lubridate::today(), format='%Y%m%d')
# create folder for reclass tables to live
folderpath <- paste("../output_big/beetox_cmpd_cdl_reclass",date, sep = "_")
#create output directory if it doesn't already exist
if (!dir.exists(folderpath)) {
dir.create(folderpath)
}
# split tables by compound
data_full <- ungroup(data_full)
data_list <- split(data_full, as.factor(data_full$cmpd_usgs))
nam <- ls.str(data_list)
walk2(data_list, nam, ~write_csv(.x, paste0(folderpath,"/",.y, ".csv")))
sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Catalina 10.15.7
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.3 purrr_0.3.2
[5] readr_1.3.1 tidyr_1.1.0 tibble_2.1.3 ggplot2_3.2.0
[9] tidyverse_1.2.1
loaded via a namespace (and not attached):
[1] Rcpp_1.0.6 cellranger_1.1.0 pillar_1.4.2 compiler_3.6.1
[5] tools_3.6.1 digest_0.6.20 lubridate_1.7.4 jsonlite_1.6
[9] evaluate_0.14 lifecycle_0.2.0 nlme_3.1-140 gtable_0.3.0
[13] lattice_0.20-38 pkgconfig_2.0.2 rlang_0.4.7 cli_1.1.0
[17] rstudioapi_0.10 yaml_2.2.0 haven_2.1.1 xfun_0.8
[21] withr_2.1.2 xml2_1.2.0 httr_1.4.2 knitr_1.23
[25] hms_0.5.0 generics_0.0.2 vctrs_0.3.2 grid_3.6.1
[29] tidyselect_1.1.0 glue_1.3.1 R6_2.4.0 readxl_1.3.1
[33] rmarkdown_1.14 modelr_0.1.4 magrittr_1.5 ellipsis_0.2.0.1
[37] backports_1.1.4 scales_1.0.0 htmltools_0.3.6 rvest_0.3.4
[41] assertthat_0.2.1 colorspace_1.4-1 stringi_1.4.3 lazyeval_0.2.2
[45] munsell_0.5.0 broom_0.5.2 crayon_1.3.4
This R Markdown site was created with workflowr