Warning in as.POSIXlt.POSIXct(Sys.time()): unknown timezone 'zone/tz/2018c.
1.0/zoneinfo/America/New_York'

Last updated: 2018-05-15

Code version: 62e32da

Purpose

This code is intended to extract and store data on US crop acreage from the US Agricultural Census and USDA’s County Agricultural Production Survey. The census is more comprehensive but is conducted only every 5 years. The annual survey is more frequent but has less comprehensive geographic coverage.

Note: These are large data files (1-6 GB) and this code takes hours to run.

Data sources

The datasets for both programs were downloaded directly from the ‘back side’ of USDA’s Quick Stats database. The files can be accessed by going to the Quick Stats developer page and clicking the link toward the bottom to download the files. Each file was appended with the download date in the format YYYYMMDD.

  • Agricultural Census files for 2002, 2007, and 2012 (date is download date)
    • qs.census2002.txt
    • qs.census2007.txt
    • qs.census2012.txt
  • Other files (including CAPS; date is download date)
    • qs.crops.txt
    • qs.economics.txt
    • qs.environmental.txt

Libraries & functions

library(data.table)
library(tidyverse)

Agricultural Census

2012 data

Load data

qs.census2012 <- read.delim("../data_big/ag_census/qs.census2012_20170502.txt", dec=",")
str(qs.census2012)
table(qs.census2012$GROUP_DESC, qs.census2012$STATISTICCAT_DESC)

Extract national data

# Select national acreage data, including individual crops and overall 'farmland' estimates (these are under 'economics')
qs.census2012.nat.crop.ac<-filter(qs.census2012, 
                                 AGG_LEVEL_DESC=="NATIONAL" & 
                                   FREQ_DESC =="ANNUAL" &
                                   (SECTOR_DESC=="CROPS" |SECTOR_DESC=="ECONOMICS") &
                                   UNIT_DESC=="ACRES" &
                                   (GROUP_DESC=="FIELD CROPS"|
                                      GROUP_DESC=="FRUIT & TREE NUTS"|
                                      GROUP_DESC=="VEGETABLES"|
                                      GROUP_DESC=="FARMS & LAND & ASSETS"))

# Save file
write.csv(qs.census2012.nat.crop.ac, 
          file = "../output_big/ag_census/qs.census2012.nat.crop.ac_20170502.csv")

Extract state data

# Select state acreage data, including individual crops and overall 'farmland' estimates (these are under 'economics')
qs.census2012.st.crop.ac<-filter(qs.census2012, 
                           AGG_LEVEL_DESC=="STATE" & 
                           FREQ_DESC =="ANNUAL" &
                           (SECTOR_DESC=="CROPS" |SECTOR_DESC=="ECONOMICS") &
                           UNIT_DESC=="ACRES" &
                           (GROUP_DESC=="FIELD CROPS"|
                              GROUP_DESC=="FRUIT & TREE NUTS"|
                              GROUP_DESC=="VEGETABLES"|
                              GROUP_DESC=="FARMS & LAND & ASSETS"))

# Save file
write.csv(qs.census2012.st.crop.ac, 
          file = "../output_big/ag_census/qs.census2012.st.crop.ac_20170502.csv")

Extract county data

# Select county acreage data, including individual crops and overall 'farmland' estimates (these are under 'economics')
qs.census2012.cty.crop.ac<-subset(qs.census2012, 
                                  AGG_LEVEL_DESC=="COUNTY" & 
                                    FREQ_DESC =="ANNUAL" &
                                    (SECTOR_DESC=="CROPS" |SECTOR_DESC=="ECONOMICS") &
                                    UNIT_DESC=="ACRES" &
                                    (GROUP_DESC=="FIELD CROPS"|
                                       GROUP_DESC=="FRUIT & TREE NUTS"|
                                       GROUP_DESC=="VEGETABLES"|
                                       GROUP_DESC=="FARMS & LAND & ASSETS"))

# Save file
write.csv(qs.census2012.cty.crop.ac, 
          file = "../output_big/ag_census/qs.census2012.cty.crop.ac_20170502.csv")

2007 data

Load data

qs.census2007 <- read.delim("../data_big/ag_census/qs.census2007_20170502.txt", dec=",")
str(qs.census2007)
table(qs.census2007$GROUP_DESC, qs.census2007$STATISTICCAT_DESC)

Extract national data

# Select national acreage data, including individual crops and overall 'farmland' estimates (these are under 'economics')
qs.census2007.nat.crop.ac<-filter(qs.census2007, 
                                 AGG_LEVEL_DESC=="NATIONAL" & 
                                   FREQ_DESC =="ANNUAL" &
                                   (SECTOR_DESC=="CROPS" |SECTOR_DESC=="ECONOMICS") &
                                   UNIT_DESC=="ACRES" &
                                   (GROUP_DESC=="FIELD CROPS"|
                                      GROUP_DESC=="FRUIT & TREE NUTS"|
                                      GROUP_DESC=="VEGETABLES"|
                                      GROUP_DESC=="FARMS & LAND & ASSETS"))

# Save file
write.csv(qs.census2007.nat.crop.ac, 
          file = "../output_big/ag_census/qs.census2007.nat.crop.ac_20170502.csv")

Extract state data

# Select state acreage data, including individual crops and overall 'farmland' estimates (these are under 'economics')
qs.census2007.st.crop.ac<-filter(qs.census2007, 
                           AGG_LEVEL_DESC=="STATE" & 
                           FREQ_DESC =="ANNUAL" &
                           (SECTOR_DESC=="CROPS" |SECTOR_DESC=="ECONOMICS") &
                           UNIT_DESC=="ACRES" &
                           (GROUP_DESC=="FIELD CROPS"|
                              GROUP_DESC=="FRUIT & TREE NUTS"|
                              GROUP_DESC=="VEGETABLES"|
                              GROUP_DESC=="FARMS & LAND & ASSETS"))

# Save file
write.csv(qs.census2007.st.crop.ac, 
          file = "../output_big/ag_census/qs.census2007.st.crop.ac_20170502.csv")

Extract county data

# Select county acreage data, including individual crops and overall 'farmland' estimates (these are under 'economics')
qs.census2007.cty.crop.ac<-subset(qs.census2007, 
                                  AGG_LEVEL_DESC=="COUNTY" & 
                                    FREQ_DESC =="ANNUAL" &
                                    (SECTOR_DESC=="CROPS" |SECTOR_DESC=="ECONOMICS") &
                                    UNIT_DESC=="ACRES" &
                                    (GROUP_DESC=="FIELD CROPS"|
                                       GROUP_DESC=="FRUIT & TREE NUTS"|
                                       GROUP_DESC=="VEGETABLES"|
                                       GROUP_DESC=="FARMS & LAND & ASSETS"))

# Save file
write.csv(qs.census2007.cty.crop.ac, 
          file = "../output_big/ag_census/qs.census2007.cty.crop.ac_20170502.csv")

2002 data

Load data

qs.census2002 <- read.delim("../data_big/ag_census/qs.census2002_20170502.txt", dec=",")
str(qs.census2002)
table(qs.census2002$GROUP_DESC, qs.census2002$STATISTICCAT_DESC)

Extract national data

# Select national acreage data, including individual crops and overall 'farmland' estimates (these are under 'economics')
qs.census2002.nat.crop.ac<-filter(qs.census2002, 
                                 AGG_LEVEL_DESC=="NATIONAL" & 
                                   FREQ_DESC =="ANNUAL" &
                                   (SECTOR_DESC=="CROPS" |SECTOR_DESC=="ECONOMICS") &
                                   UNIT_DESC=="ACRES" &
                                   (GROUP_DESC=="FIELD CROPS"|
                                      GROUP_DESC=="FRUIT & TREE NUTS"|
                                      GROUP_DESC=="VEGETABLES"|
                                      GROUP_DESC=="FARMS & LAND & ASSETS"))

# Save file
write.csv(qs.census2002.nat.crop.ac, 
          file = "../output_big/ag_census/qs.census2002.nat.crop.ac_20170502.csv")

Extract state data

# Select state acreage data, including individual crops and overall 'farmland' estimates (these are under 'economics')
qs.census2002.st.crop.ac<-filter(qs.census2002, 
                           AGG_LEVEL_DESC=="STATE" & 
                           FREQ_DESC =="ANNUAL" &
                           (SECTOR_DESC=="CROPS" |SECTOR_DESC=="ECONOMICS") &
                           UNIT_DESC=="ACRES" &
                           (GROUP_DESC=="FIELD CROPS"|
                              GROUP_DESC=="FRUIT & TREE NUTS"|
                              GROUP_DESC=="VEGETABLES"|
                              GROUP_DESC=="FARMS & LAND & ASSETS"))

# Save file
write.csv(qs.census2002.st.crop.ac, 
          file = "../output_big/ag_census/qs.census2002.st.crop.ac_20170502.csv")

Extract county data

# Select county acreage data, including individual crops and overall 'farmland' estimates (these are under 'economics')
qs.census2002.cty.crop.ac<-subset(qs.census2002, 
                                  AGG_LEVEL_DESC=="COUNTY" & 
                                    FREQ_DESC =="ANNUAL" &
                                    (SECTOR_DESC=="CROPS" |SECTOR_DESC=="ECONOMICS") &
                                    UNIT_DESC=="ACRES" &
                                    (GROUP_DESC=="FIELD CROPS"|
                                       GROUP_DESC=="FRUIT & TREE NUTS"|
                                       GROUP_DESC=="VEGETABLES"|
                                       GROUP_DESC=="FARMS & LAND & ASSETS"))

# Save file
write.csv(qs.census2002.cty.crop.ac, 
          file = "../output_big/ag_census/qs.census2002.cty.crop.ac_20170502.csv")

Annual crop survey (CAPS)

Crop data

Load data

qs.crops<-fread("../data_big/nass_survey/qs.crops_20170519.txt")
str(qs.crops)

Subset acreage data

# Extract acreage data
# domain_desc=TOTAL excludes acreage data broken down by operation size, etc.

qs.crops.ac<-filter(qs.crops, 
                    UNIT_DESC=="ACRES" &
                      DOMAIN_DESC=="TOTAL")

Subset by geography

with(qs.crops.ac, table(AGG_LEVEL_DESC))
qs.crops.ac.nat<-filter(qs.crops.ac, AGG_LEVEL_DESC=="NATIONAL")
qs.crops.ac.st<-filter(qs.crops.ac, AGG_LEVEL_DESC=="STATE")
qs.crops.ac.cty<-filter(qs.crops.ac, AGG_LEVEL_DESC=="COUNTY")

Save files

write.csv(qs.crops.ac.nat, "../output_big/nass_survey/qs.crops.ac.nat_20170519.csv")
write.csv(qs.crops.ac.st, "../output_big/nass_survey/qs.crops.ac.st_20170519.csv")
write.csv(qs.crops.ac.cty, "../output_big/nass_survey/qs.crops.ac.cty_20170519.csv")

Pastureland, fallow land, and non-ag

Load data

qs.economics <- fread("../data_big/nass_survey/qs.economics_20170519.txt")
str(qs.economics)

Subset acreage data

# Extract acreage data
# domain_desc=TOTAL excludes acreage data broken down by operation size, etc.
# commodity_desc = AG LAND selects only data items having to do with land area
# short_desc = "LAND AREA - INCL, NON-AG - ACRES" selects data items for total land area

qs.economics.ac<-subset(qs.economics, 
                    UNIT_DESC=="ACRES" &
                      DOMAIN_DESC=="TOTAL" &
                      (COMMODITY_DESC=="AG LAND"|SHORT_DESC=="LAND AREA, INCL NON-AG - ACRES"))
str(qs.economics.ac)

Subset by geography

with(qs.economics.ac, table(AGG_LEVEL_DESC))
qs.economics.ac.nat<-filter(qs.economics.ac, AGG_LEVEL_DESC=="NATIONAL")
qs.economics.ac.st<-filter(qs.economics.ac, AGG_LEVEL_DESC=="STATE")
qs.economics.ac.cty<-filter(qs.economics.ac, AGG_LEVEL_DESC=="COUNTY")

Save files

write.csv(qs.economics.ac.nat, "../output_big/nass_survey/qs.economics.ac.nat_20170519.csv")
write.csv(qs.economics.ac.st, "../output_big/nass_survey/qs.economics.ac.st_20170519.csv")
write.csv(qs.economics.ac.cty, "../output_big/nass_survey/qs.economics.ac.cty_20170519.csv")

Land treated

Load data

qs.environment <- fread("../data_big/nass_survey/qs.environmental_20170519.txt")
Warning in fread("../data_big/nass_survey/qs.environmental_20170519.txt"):
Bumped column 26 to type character on data row 169, field contains 'NEW
ENGLAND'. Coercing previously read values in this column from logical,
integer or numeric back to character which may not be lossless; e.g., if
'00' and '000' occurred before they will now be just '0', and there may
be inconsistencies with treatment of ',,' and ',NA,' too (if they occurred
in this column before the bump). If this matters please rerun and set
'colClasses' to 'character' for this column. Please note that column type
detection uses a sample of 1,000 rows (100 rows at 10 points) so hopefully
this message should be very rare. If reporting to datatable-help, please
rerun and include the output from verbose=TRUE.

Read 51.9% of 1001914 rows
Read 1001914 rows and 39 (of 39) columns from 0.359 GB file in 00:00:03
str(qs.environment)
Classes 'data.table' and 'data.frame':  1001914 obs. of  39 variables:
 $ SOURCE_DESC          : chr  "CENSUS" "CENSUS" "CENSUS" "CENSUS" ...
 $ SECTOR_DESC          : chr  "ENVIRONMENTAL" "ENVIRONMENTAL" "ENVIRONMENTAL" "ENVIRONMENTAL" ...
 $ GROUP_DESC           : chr  "FIELD CROPS" "FIELD CROPS" "FIELD CROPS" "FIELD CROPS" ...
 $ COMMODITY_DESC       : chr  "SORGHUM" "WHEAT" "WHEAT" "COTTON" ...
 $ CLASS_DESC           : chr  "ALL CLASSES" "ALL CLASSES" "ALL CLASSES" "ALL CLASSES" ...
 $ PRODN_PRACTICE_DESC  : chr  "IRRIGATED, CHEMIGATION" "IRRIGATED, CHEMIGATION" "IRRIGATED, CHEMIGATION" "IRRIGATED, CHEMIGATION" ...
 $ UTIL_PRACTICE_DESC   : chr  "GRAIN" "ALL UTILIZATION PRACTICES" "ALL UTILIZATION PRACTICES" "ALL UTILIZATION PRACTICES" ...
 $ STATISTICCAT_DESC    : chr  "TREATED" "TREATED" "TREATED" "TREATED" ...
 $ UNIT_DESC            : chr  "OPERATIONS" "OPERATIONS" "ACRES" "OPERATIONS" ...
 $ SHORT_DESC           : chr  "SORGHUM, GRAIN, IRRIGATED, CHEMIGATION - OPERATIONS WITH TREATED" "WHEAT, IRRIGATED, CHEMIGATION - OPERATIONS WITH TREATED" "WHEAT, IRRIGATED, CHEMIGATION - TREATED, MEASURED IN ACRES" "COTTON, IRRIGATED, CHEMIGATION - OPERATIONS WITH TREATED" ...
 $ DOMAIN_DESC          : chr  "CHEMICAL" "FERTILIZER" "FERTILIZER" "FERTILIZER" ...
 $ DOMAINCAT_DESC       : chr  "CHEMICAL: (TOTAL)" "FERTILIZER: (TOTAL)" "FERTILIZER: (TOTAL)" "FERTILIZER: (TOTAL)" ...
 $ AGG_LEVEL_DESC       : chr  "NATIONAL" "NATIONAL" "NATIONAL" "NATIONAL" ...
 $ STATE_ANSI           : int  NA NA NA NA NA NA NA 1 1 1 ...
 $ STATE_FIPS_CODE      : int  99 99 99 99 99 99 99 1 1 1 ...
 $ STATE_ALPHA          : chr  "US" "US" "US" "US" ...
 $ STATE_NAME           : chr  "US TOTAL" "US TOTAL" "US TOTAL" "US TOTAL" ...
 $ ASD_CODE             : int  NA NA NA NA NA NA NA NA NA NA ...
 $ ASD_DESC             : chr  "" "" "" "" ...
 $ COUNTY_ANSI          : int  NA NA NA NA NA NA NA NA NA NA ...
 $ COUNTY_CODE          : int  NA NA NA NA NA NA NA NA NA NA ...
 $ COUNTY_NAME          : chr  "" "" "" "" ...
 $ REGION_DESC          : chr  "" "" "" "" ...
 $ ZIP_5                : logi  NA NA NA NA NA NA ...
 $ WATERSHED_CODE       : int  0 0 0 0 0 0 0 0 0 0 ...
 $ WATERSHED_DESC       : chr  "" "" "" "" ...
 $ CONGR_DISTRICT_CODE  : logi  NA NA NA NA NA NA ...
 $ COUNTRY_CODE         : int  9000 9000 9000 9000 9000 9000 9000 9000 9000 9000 ...
 $ COUNTRY_NAME         : chr  "UNITED STATES" "UNITED STATES" "UNITED STATES" "UNITED STATES" ...
 $ LOCATION_DESC        : chr  "US TOTAL" "US TOTAL" "US TOTAL" "US TOTAL" ...
 $ YEAR                 : int  2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
 $ FREQ_DESC            : chr  "ANNUAL" "ANNUAL" "ANNUAL" "ANNUAL" ...
 $ BEGIN_CODE           : int  0 0 0 0 0 0 0 0 0 0 ...
 $ END_CODE             : int  0 0 0 0 0 0 0 0 0 0 ...
 $ REFERENCE_PERIOD_DESC: chr  "YEAR" "YEAR" "YEAR" "YEAR" ...
 $ WEEK_ENDING          : logi  NA NA NA NA NA NA ...
 $ LOAD_TIME            : chr  "2012-12-31 00:00:00" "2012-12-31 00:00:00" "2012-12-31 00:00:00" "2012-12-31 00:00:00" ...
 $ VALUE                : chr  "80" "2,290" "732,345" "1,663" ...
 $ CV_%                 : chr  "" "" "" "" ...
 - attr(*, ".internal.selfref")=<externalptr> 

Subset treatment data

# domain_desc=TOTAL excludes acreage data broken down by operation size, etc.
# commodity_desc=AG LAND selects data items having to do with treatments of total ag land
# statisticcat_desc = TREATED selects only data items having to do with chemical treatments

qs.environment.trt <- subset(qs.environment, 
                    UNIT_DESC=="ACRES" &
                      COMMODITY_DESC == "AG LAND" &
                      STATISTICCAT_DESC=="TREATED" &
                      SHORT_DESC=="AG LAND - TREATED, MEASURED IN ACRES")
str(qs.environment.trt)

Subset by geography

with(qs.environment.trt, table(AGG_LEVEL_DESC))
qs.environment.trt.nat<-filter(qs.environment.trt, AGG_LEVEL_DESC=="NATIONAL")
qs.environment.trt.st<-filter(qs.environment.trt, AGG_LEVEL_DESC=="STATE")
qs.environment.trt.cty<-filter(qs.environment.trt, AGG_LEVEL_DESC=="COUNTY")

Save files

write.csv(qs.environment.trt.nat, "../output_big/nass_survey/qs.environment.trt.nat_20170519.csv")
write.csv(qs.environment.trt.st, "../output_big/nass_survey/qs.environment.trt.st_20170519.csv")
write.csv(qs.environment.trt.cty, "../output_big/nass_survey/qs.environment.trt.cty_20170519.csv")

Session information

sessionInfo()
R version 3.3.3 (2017-03-06)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: macOS  10.13.3

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] forcats_0.2.0       stringr_1.2.0       dplyr_0.7.4        
 [4] purrr_0.2.4         readr_1.1.1         tidyr_0.7.2        
 [7] tibble_1.4.2        ggplot2_2.2.1       tidyverse_1.2.1    
[10] data.table_1.10.4-3

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.14     cellranger_1.1.0 pillar_1.1.0     git2r_0.21.0    
 [5] plyr_1.8.4       bindr_0.1        tools_3.3.3      digest_0.6.14   
 [9] lubridate_1.7.1  jsonlite_1.5     evaluate_0.10.1  nlme_3.1-131    
[13] gtable_0.2.0     lattice_0.20-34  pkgconfig_2.0.1  rlang_0.1.6     
[17] psych_1.7.8      cli_1.0.0        rstudioapi_0.7   yaml_2.1.16     
[21] parallel_3.3.3   haven_1.1.1      bindrcpp_0.2     xml2_1.1.1      
[25] httr_1.3.1       knitr_1.18       hms_0.4.0        rprojroot_1.3-2 
[29] grid_3.3.3       glue_1.2.0       R6_2.2.2         readxl_1.0.0    
[33] foreign_0.8-67   rmarkdown_1.8    modelr_0.1.1     reshape2_1.4.3  
[37] magrittr_1.5     backports_1.1.2  scales_0.5.0     htmltools_0.3.6 
[41] rvest_0.3.2      assertthat_0.2.0 mnormt_1.5-5     colorspace_1.3-2
[45] stringi_1.1.6    lazyeval_0.2.1   munsell_0.4.3    broom_0.4.3     
[49] crayon_1.3.4    

This R Markdown site was created with workflowr