Last updated: 2019-07-24
This code is intended to extract and store data on US crop acreage from the US Agricultural Census and USDA’s County Agricultural Production Survey. The census is more comprehensive but is conducted only every 5 years. The annual survey is more frequent but has less comprehensive geographic coverage.
Note: These are large data files (1-6 GB) and this code takes time to run.
The datasets for both programs were downloaded directly from the ‘back side’ of USDA’s Quick Stats database. The files can be accessed by going to the Quick Stats developer page and clicking the link toward the bottom to download the files. Each file was appended with the download date in the format YYYYMMDD
.
library(data.table)
library(tidyverse)
qs.economics <- fread("../data_big/nass_survey/qs.economics_20170519.txt")
str(qs.economics)
# Extract acreage data
# domain_desc=TOTAL excludes acreage data broken down by operation size, etc.
# commodity_desc = AG LAND selects only data items having to do with land area
# short_desc = "LAND AREA - INCL, NON-AG - ACRES" selects data items for total land area
qs.economics.ac<-subset(qs.economics,
UNIT_DESC=="ACRES" &
DOMAIN_DESC=="TOTAL" &
(COMMODITY_DESC=="AG LAND"|SHORT_DESC=="LAND AREA, INCL NON-AG - ACRES"))
str(qs.economics.ac)
# Subset county data
qs.economics.ac.cty<-filter(qs.economics.ac, AGG_LEVEL_DESC=="COUNTY")
write.csv(qs.economics.ac.cty, "../output_big/nass_survey/qs.economics.ac.cty_20170519.csv")
qs.environment <- fread("../data_big/nass_survey/qs.environmental_20170519.txt")
str(qs.environment)
Classes 'data.table' and 'data.frame': 1001914 obs. of 39 variables:
$ SOURCE_DESC : chr "CENSUS" "CENSUS" "CENSUS" "CENSUS" ...
$ SECTOR_DESC : chr "ENVIRONMENTAL" "ENVIRONMENTAL" "ENVIRONMENTAL" "ENVIRONMENTAL" ...
$ GROUP_DESC : chr "FIELD CROPS" "FIELD CROPS" "FIELD CROPS" "FIELD CROPS" ...
$ COMMODITY_DESC : chr "SORGHUM" "WHEAT" "WHEAT" "COTTON" ...
$ CLASS_DESC : chr "ALL CLASSES" "ALL CLASSES" "ALL CLASSES" "ALL CLASSES" ...
$ PRODN_PRACTICE_DESC : chr "IRRIGATED, CHEMIGATION" "IRRIGATED, CHEMIGATION" "IRRIGATED, CHEMIGATION" "IRRIGATED, CHEMIGATION" ...
$ UTIL_PRACTICE_DESC : chr "GRAIN" "ALL UTILIZATION PRACTICES" "ALL UTILIZATION PRACTICES" "ALL UTILIZATION PRACTICES" ...
$ STATISTICCAT_DESC : chr "TREATED" "TREATED" "TREATED" "TREATED" ...
$ UNIT_DESC : chr "OPERATIONS" "OPERATIONS" "ACRES" "OPERATIONS" ...
$ SHORT_DESC : chr "SORGHUM, GRAIN, IRRIGATED, CHEMIGATION - OPERATIONS WITH TREATED" "WHEAT, IRRIGATED, CHEMIGATION - OPERATIONS WITH TREATED" "WHEAT, IRRIGATED, CHEMIGATION - TREATED, MEASURED IN ACRES" "COTTON, IRRIGATED, CHEMIGATION - OPERATIONS WITH TREATED" ...
$ DOMAIN_DESC : chr "CHEMICAL" "FERTILIZER" "FERTILIZER" "FERTILIZER" ...
$ DOMAINCAT_DESC : chr "CHEMICAL: (TOTAL)" "FERTILIZER: (TOTAL)" "FERTILIZER: (TOTAL)" "FERTILIZER: (TOTAL)" ...
$ AGG_LEVEL_DESC : chr "NATIONAL" "NATIONAL" "NATIONAL" "NATIONAL" ...
$ STATE_ANSI : int NA NA NA NA NA NA NA 1 1 1 ...
$ STATE_FIPS_CODE : int 99 99 99 99 99 99 99 1 1 1 ...
$ STATE_ALPHA : chr "US" "US" "US" "US" ...
$ STATE_NAME : chr "US TOTAL" "US TOTAL" "US TOTAL" "US TOTAL" ...
$ ASD_CODE : int NA NA NA NA NA NA NA NA NA NA ...
$ ASD_DESC : chr "" "" "" "" ...
$ COUNTY_ANSI : int NA NA NA NA NA NA NA NA NA NA ...
$ COUNTY_CODE : int NA NA NA NA NA NA NA NA NA NA ...
$ COUNTY_NAME : chr "" "" "" "" ...
$ REGION_DESC : chr "" "" "" "" ...
$ ZIP_5 : logi NA NA NA NA NA NA ...
$ WATERSHED_CODE : int 0 0 0 0 0 0 0 0 0 0 ...
$ WATERSHED_DESC : chr "" "" "" "" ...
$ CONGR_DISTRICT_CODE : logi NA NA NA NA NA NA ...
$ COUNTRY_CODE : int 9000 9000 9000 9000 9000 9000 9000 9000 9000 9000 ...
$ COUNTRY_NAME : chr "UNITED STATES" "UNITED STATES" "UNITED STATES" "UNITED STATES" ...
$ LOCATION_DESC : chr "US TOTAL" "US TOTAL" "US TOTAL" "US TOTAL" ...
$ YEAR : int 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
$ FREQ_DESC : chr "ANNUAL" "ANNUAL" "ANNUAL" "ANNUAL" ...
$ BEGIN_CODE : int 0 0 0 0 0 0 0 0 0 0 ...
$ END_CODE : int 0 0 0 0 0 0 0 0 0 0 ...
$ REFERENCE_PERIOD_DESC: chr "YEAR" "YEAR" "YEAR" "YEAR" ...
$ WEEK_ENDING : logi NA NA NA NA NA NA ...
$ LOAD_TIME : chr "2012-12-31 00:00:00" "2012-12-31 00:00:00" "2012-12-31 00:00:00" "2012-12-31 00:00:00" ...
$ VALUE : chr "80" "2,290" "732,345" "1,663" ...
$ CV_% : chr "" "" "" "" ...
- attr(*, ".internal.selfref")=<externalptr>
# domain_desc=TOTAL excludes acreage data broken down by operation size, etc.
# commodity_desc=AG LAND selects data items having to do with treatments of total ag land
# statisticcat_desc = TREATED selects only data items having to do with chemical treatments
qs.environment.trt <- subset(qs.environment,
UNIT_DESC=="ACRES" &
COMMODITY_DESC == "AG LAND" &
STATISTICCAT_DESC=="TREATED" &
SHORT_DESC=="AG LAND - TREATED, MEASURED IN ACRES")
str(qs.environment.trt)
# Subset county data
qs.environment.trt.cty<-filter(qs.environment.trt, AGG_LEVEL_DESC=="COUNTY")
write.csv(qs.environment.trt.cty, "../output_big/nass_survey/qs.environment.trt.cty_20170519.csv")
sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.6
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.3
[4] purrr_0.3.2 readr_1.3.1 tidyr_0.8.3
[7] tibble_2.1.3 ggplot2_3.2.0 tidyverse_1.2.1
[10] data.table_1.12.2
loaded via a namespace (and not attached):
[1] Rcpp_1.0.1 cellranger_1.1.0 pillar_1.4.2 compiler_3.6.1
[5] tools_3.6.1 zeallot_0.1.0 digest_0.6.20 lubridate_1.7.4
[9] jsonlite_1.6 evaluate_0.14 nlme_3.1-140 gtable_0.3.0
[13] lattice_0.20-38 pkgconfig_2.0.2 rlang_0.4.0 cli_1.1.0
[17] rstudioapi_0.10 yaml_2.2.0 haven_2.1.1 xfun_0.8
[21] withr_2.1.2 xml2_1.2.0 httr_1.4.0 knitr_1.23
[25] vctrs_0.2.0 hms_0.5.0 generics_0.0.2 fs_1.3.1
[29] grid_3.6.1 tidyselect_0.2.5 glue_1.3.1 R6_2.4.0
[33] readxl_1.3.1 rmarkdown_1.14 modelr_0.1.4 magrittr_1.5
[37] backports_1.1.4 scales_1.0.0 htmltools_0.3.6 rvest_0.3.4
[41] assertthat_0.2.1 colorspace_1.4-1 stringi_1.4.3 lazyeval_0.2.2
[45] munsell_0.5.0 broom_0.5.2 crayon_1.3.4
This R Markdown site was created with workflowr