Validation - land cover summary

Purpose

This code summarizes coverage of pesticide use estimates by extent of agricultural land and surveyed crops by combinations of states and years.

Libraries and functions

library(tidyverse)

Load data

# load land cover data
lc <- read.csv("../output_big/land-cover-summary.csv") %>%
  filter(state != "CA")

lc_ca <- read.csv("../output_big/land-cover-summary.csv") %>%
  filter(state == "CA")

# load state region key
reg <- read.csv("../keys/usda_farm_regions_state.csv")

# load cdl reclass key (all states but CA)
reclass <- read.csv("../keys/cdl_reclass.csv") %>%
  dplyr::select(value, Categories, USGS_group)

reclass_ca <- read.csv("../keys/cdl_reclass_CA.csv") %>%
  dplyr::select(value, Categories, USGS_group)

# reclass keys with pesticide data
ins_key <- read.csv("../output_big/beetox_I_cdl_reclass.20220325.csv") %>%
  select(yr = year, 
         state = state_alpha, 
         Categories, 
         kg_ha_I = kg_ha, 
         ld50_ct_ha_bil, 
         ld50_or_ha_bil, 
         source_I = source)

herb_key <- read.csv("../output_big/beetox_H_cdl_reclass.20210625.csv") %>%
    select(yr = year, 
         state = state_alpha, 
         Categories, 
         kg_ha_H = kg_ha, 
         source_H = source)

fun_key <- read.csv("../output_big/beetox_F_cdl_reclass.20210625.csv") %>%
    select(yr = year, 
         state = state_alpha, 
         Categories, 
         kg_ha_F = kg_ha, 
         source_F = source)

Join data

joined <- lc %>%
  left_join(reg, by = "state") %>%
  left_join(reclass, by = "value") %>%
  na.omit() %>%
  mutate(lc_cat = ifelse(USGS_group == "Not_surveyed", "Not_surveyed",
         ifelse(USGS_group == "Non_crop", "Non_crop", "Surveyed_agland")))

joined_ca <- lc_ca %>%
  left_join(reg, by = "state") %>%
  left_join(reclass_ca, by = "value") %>%
  na.omit() %>%
  mutate(lc_cat = ifelse(USGS_group == "Not_surveyed", "Not_surveyed",
         ifelse(USGS_group == "Non_crop", "Non_crop", "Surveyed_agland")))

# store vector of crops with crop-specific pesticide estimates
sp_crops <- c("Corn","Cotton","Soybeans","Alfalfa","Wheat","Rice")

joined_full <- rbind(joined, joined_ca) %>%
  mutate(crop_specific = ifelse(USGS_group %in% sp_crops, "yes", "no"))

joined_area <- joined_full %>%
  left_join(ins_key, by = c('state', 'yr', 'Categories')) %>%
  left_join(herb_key, by = c('state', 'yr', 'Categories')) %>%
  left_join(fun_key, by = c('state', 'yr', 'Categories')) %>%
  mutate(crop_specific = ifelse(USGS_group %in% sp_crops, "yes", "no"))

Summarize data

Surveyed area relative to ag and total area

# summarize pixels in each region, state, and year
lc_tot <- joined_full %>%
  group_by(region, state, yr) %>%
  summarise(pixels_tot = sum(count)) %>%
  ungroup()

## `summarise()` has grouped output by 'region', 'state'. You can override using the `.groups` argument.

# summarize pixels in agricultural land (both surveyed and not)
lc_ag <- joined_full %>%
  filter(lc_cat %in% c("Surveyed_agland", "Not_surveyed")) %>%
  group_by(region, state, yr) %>%
  summarise(pixels_ag = sum(count)) %>%
  ungroup()

## `summarise()` has grouped output by 'region', 'state'. You can override using the `.groups` argument.

# summarize pixels in double crops
lc_dbl <- joined_full %>%
  filter(USGS_group == "Double") %>%
  group_by(region, state, yr) %>%
  summarise(pixels_dbl = sum(count)) %>%
  ungroup()

## `summarise()` has grouped output by 'region', 'state'. You can override using the `.groups` argument.

# summarize pixels in crops (exclude pasture)
lc_crop <- joined_full %>%
  filter(lc_cat %in% c("Surveyed_agland", "Not_surveyed") & USGS_group != "Pasture_and_hay") %>%
  group_by(region, state, yr) %>%
  summarise(pixels_crop = sum(count)) %>%
  ungroup()

## `summarise()` has grouped output by 'region', 'state'. You can override using the `.groups` argument.

# summarize surveyed pixels that are crop-specific
lc_sp <- joined_full %>%
  filter(lc_cat %in% c("Surveyed_agland") & crop_specific == "yes") %>%
  group_by(region, state, yr) %>%
  summarise(pixels_sp = sum(count)) %>%
  ungroup()

## `summarise()` has grouped output by 'region', 'state'. You can override using the `.groups` argument.

# join all the summary values together and calculate percentages
lc_sum <- joined_full %>%
  group_by(region, state, yr, lc_cat) %>%
  summarise(pixels = sum(count)) %>%
  left_join(lc_tot, by = c("region", "state", "yr")) %>%
  left_join(lc_ag, by = c("region", "state", "yr")) %>%
  left_join(lc_dbl, by = c("region", "state", "yr")) %>%
  left_join(lc_crop, by = c("region", "state", "yr")) %>%
  left_join(lc_sp, by = c("region", "state", "yr")) %>%
  unite("reg_state", region, state, sep = "_", remove = FALSE) %>%
  mutate(tot_perc = (pixels/pixels_tot)*100, # each category (non crop, surveyed, unsurveyed) as percent of total land area
         ag_perc = (pixels/pixels_ag)*100, # each category as percent of agricultural land
         dbl_perc = (pixels_dbl/pixels_ag)*100, # double crops as percent of agricultural land
         sp_perc = (pixels_sp/pixels_crop)*100) %>% # crop-specific as a percent of agricultural land
  ungroup()

## `summarise()` has grouped output by 'region', 'state', 'yr'. You can override using the `.groups` argument.

# turn long for graphing - and filter to surveyed agland
lc_reshaped <- lc_sum %>%
  filter(lc_cat == "Surveyed_agland") %>%
  select(reg_state, region, state, yr, tot_perc, ag_perc, sp_perc) %>%
  pivot_longer(cols = tot_perc:sp_perc,
               names_to = "indicator",
               values_to = "percent")

Combined figure

lc_reshaped$indicator <- factor(lc_reshaped$indicator,
                                levels = c("tot_perc",
                                           "ag_perc",
                                           "sp_perc"),
                                labels = c("Total land area\n surveyed (%)",
                                           "Agricultural area\n surveyed (%)",
                                           "Crop-specific estimate\n (% cropland, excl. pasture)"))
lc_reshaped$region <- factor(lc_reshaped$region,
                             labels = c("Appalachia", "Corn Belt", "Delta", "Lake", "Mountain",
                                        "Northeast", "Northern\n Plains", "Pacific", "Southeast", "Southern\n Plains"))

ggplot(data = lc_reshaped, aes(x = state, y = percent, fill = indicator)) +
  geom_boxplot() +
  facet_grid(region ~ indicator, scales = "free", space = "free", switch = "y",
             as.table = TRUE, labeller = label_value) +
  coord_flip() +
  #theme_classic() +
  theme(panel.grid.major.x = element_line(colour = "white", linetype = 3),
        panel.grid.minor.x = element_blank(),
        panel.border = element_rect(color = "black", fill = NA, size = 1),
    legend.position = "none", strip.placement = "outside",
    panel.grid.minor.y = element_blank(), 
    strip.background = element_rect(fill=NA),
    strip.text.x = element_text(size = 11),
    axis.text.x = element_text(size = 10),
    strip.text.y.left = element_text(size = 11, angle = 0, vjust = 0.5),
    axis.text.y = element_text(size = 10)) +
  ylab("") +
  ylim(c(0,100)) +
  xlab("")

Save figure for publication

pdf(file = "../docs/validation_land-cover-summary_files/figure-html/fig4.pdf",   
    width = 8, 
    height = 12)

ggplot(data = lc_reshaped, aes(x = state, y = percent, fill = indicator)) +
  geom_boxplot() +
  facet_grid(region ~ indicator, scales = "free", space = "free", switch = "y",
             as.table = TRUE, labeller = label_value) +
  coord_flip() +
  #theme_classic() +
  theme(panel.grid.major.x = element_line(colour = "white", linetype = 3),
        panel.grid.minor.x = element_blank(),
        panel.border = element_rect(color = "black", fill = NA, size = 1),
    legend.position = "none", strip.placement = "outside",
    panel.grid.minor.y = element_blank(), 
    strip.background = element_rect(fill=NA),
    strip.text.x = element_text(size = 11),
    axis.text.x = element_text(size = 10),
    strip.text.y.left = element_text(size = 11, angle = 0, vjust = 0.5),
    axis.text.y = element_text(size = 10)) +
  ylab("") +
  ylim(c(0,100)) +
  xlab("") 

dev.off()

## quartz_off_screen 
##                 2

Extent of double-crops

dbl <- filter(lc_sum, lc_cat == "Surveyed_agland")

ggplot(data = dbl, aes(x = reorder(state, -dbl_perc), y = dbl_perc, fill = region)) +
  geom_boxplot() +
  coord_flip() +
  theme_classic() +
  theme(panel.grid.major.x = element_line(colour = "gray", linetype = 3)) +
  ylab("Agricultural area in double crops (%)") +
  xlab("")

Describe unsurveyed crops

unsurv <- joined_full %>%
  filter(lc_cat == "Not_surveyed") %>%
  group_by(region, state, value, Categories) %>%
  summarise(count = median(count)) %>%
  arrange(state, -count)

## `summarise()` has grouped output by 'region', 'state', 'value'. You can override using the `.groups` argument.

unsurv_sum <- unsurv %>%
  group_by(Categories) %>%
  summarise(count = sum(count))

This R Markdown site was created with workflowr

Validation - land cover summary

Maggie Douglas

Purpose

Libraries and functions

Load data

Join data

Summarize data

Surveyed area relative to ag and total area

Combined figure

Save figure for publication

Extent of double-crops

Describe unsurveyed crops