From cab7bc540a0c3b5ead53c6d2b5ac5b6b3664938b Mon Sep 17 00:00:00 2001 From: hemamipour Date: Tue, 21 May 2024 19:53:01 -0500 Subject: [PATCH] Create 2019neiEmis_1.ipynb Work-in-progress effort for processing the 2019 NEI using the Julia language. --- examples/2019neiEmis_1.ipynb | 1396 ++++++++++++++++++++++++++++++++++ 1 file changed, 1396 insertions(+) create mode 100644 examples/2019neiEmis_1.ipynb diff --git a/examples/2019neiEmis_1.ipynb b/examples/2019neiEmis_1.ipynb new file mode 100644 index 00000000..b35222d5 --- /dev/null +++ b/examples/2019neiEmis_1.ipynb @@ -0,0 +1,1396 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Processing the US EPA's 2019 National Emissions Inventory\n", + "\n", + "This notebook contains a work-in-progress effort for processing the 2019 NEI using the [Julia language](https://julialang.org/).\n", + "\n", + "First, we need to load the packages that we will use:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "using Revise\n", + "using CSV\n", + "using ZipFile\n", + "using DataFrames\n", + "using DBFTables\n", + "using HTTP\n", + "using Unitful\n", + "using FilePathsBase\n", + "using Proj4\n", + "using Shapefile\n", + "using JSON\n", + "using GeometryBasics\n", + "using LibGEOS\n", + "using GeoInterface\n", + "using GeoFormatTypes\n", + "using SparseArrays\n", + "using Printf\n", + "using Plots" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set download directory\n", + "\n", + "Next, we need to choose a directory on our computer to download the emissions data to. You will probably need to change the directory below to a location that exists on your computer" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"C:/Users/hemamipo/OneDrive - University of Illinois - Urbana/Research-UIUC/Tessum Group/Tasks/NEI_Processor/2019nei\"" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dir = \"/Users/$(ENV[\"USER\"])/data/2019nei\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download emissions files from the EPA web server\n", + "\n", + "In this step, we download and unzip all of the emissions files.\n", + "\n", + "You only need to do this once." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "urls = [\n", + " \"https://gaftp.epa.gov/Air/emismod/2019/2019emissions/2019ge_CMV_c1c2_inventory_16mar2023.zip\",\n", + " \"https://gaftp.epa.gov/Air/emismod/2019/2019emissions/2019ge_CMV_c3_inventory_16mar2023.zip\",\n", + " \"https://gaftp.epa.gov/Air/emismod/2019/2019emissions/2019ge_cem_inventory_10mar2023.zip\",\n", + " \"https://gaftp.epa.gov/Air/emismod/2019/2019emissions/2019ge_fertilizer_inventory_04apr2023.zip\",\n", + " \"https://gaftp.epa.gov/Air/emismod/2019/2019emissions/2019ge_fires_inventory_10mar2023.zip\",\n", + " \"https://gaftp.epa.gov/Air/emismod/2019/2019emissions/2019ge_nonpoint_inventory_10mar2023.zip\",\n", + " \"https://gaftp.epa.gov/Air/emismod/2019/2019emissions/2019ge_nonroad_inventory_10mar2023.zip\",\n", + " \"https://gaftp.epa.gov/Air/emismod/2019/2019emissions/2019ge_onroad_SMOKE-MOVES_emissions_FF10_allHAPs_30nov2021_v0.zip\",\n", + " \"https://gaftp.epa.gov/Air/emismod/2019/2019emissions/2019ge_onroad_activity_10mar2023.zip\",\n", + " \"https://gaftp.epa.gov/Air/emismod/2019/2019emissions/2019ge_oth_inventory_17mar2023.zip\",\n", + " \"https://gaftp.epa.gov/Air/emismod/2019/2019emissions/2019ge_point_inventory_10mar2023.zip\",\n", + " \"https://gaftp.epa.gov/Air/emismod/2019/ancillary_data/2019ge_gridding_ge_dat_10mar2023.zip\"\n", + "]\n", + "\n", + "for url in urls\n", + " println(\"Downloading $(url)\")\n", + " http_response = HTTP.get(url, require_ssl_verification = false)\n", + " r = ZipFile.Reader(IOBuffer(http_response.body));\n", + " for f in r.files\n", + " println(\"Filename: $(f.name)\")\n", + " file_path = joinpath(dir, f.name)\n", + " if f.name[end] == '/'\n", + " mkpath(file_path)\n", + " continue\n", + " end\n", + " mkpath(dirname(file_path))\n", + " open(file_path, \"w\") do o\n", + " write(o, read(f))\n", + " end\n", + " end\n", + "end" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Utility functions\n", + "\n", + "This next cell contains some code that we will use to load and process the emissions. It will eventually be moved into a separate software library." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "writeEmis (generic function with 1 method)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "const tonperyear = 907.185u\"kg\" / 31_536_000u\"s\"\n", + "const tonpermonth = 907.185u\"kg\" / 2_628_288u\"s\"\n", + "const foot = (1/3.28084)u\"m\"\n", + "kelvin(F) = ((F − 32.0) * 5.0/9.0 + 273.15) * u\"K\"\n", + "\n", + "abstract type EmissionsDataFrame end\n", + "\n", + "# SurrogateSpec holds surrogate specification data\n", + "struct SurrogateSpec\n", + " Region::String\n", + " Name::String\n", + " Code::Int\n", + " DataShapefile::String\n", + " DataAttribute::String\n", + " WeightShapefile::String\n", + " Details::String\n", + " BackupSurrogateNames::Vector{String}\n", + " WeightColumns::Vector{String}\n", + " WeightFactors::Vector{Float64}\n", + " FilterFunction::String\n", + " MergeNames::Vector{String}\n", + " MergeMultipliers::Vector{Float64}\n", + "end\n", + "\n", + "# GridDef specifies the grid that we are allocating the emissions to.\n", + "struct GridDef\n", + " Name::String\n", + " Nx::Int\n", + " Ny::Int\n", + " SR::String\n", + " Cells::Vector{LibGEOS.Polygon}\n", + " Extent::Vector{Vector{Tuple{Float64, Float64}}}\n", + "end\n", + "\n", + "# SpatialProcessor spatializes emissions records.\n", + "struct SpatialProcessor\n", + " SrgSpecs::Vector{SurrogateSpec}\n", + " Grids::GridDef\n", + " GridRef::DataFrame\n", + " InputSR::String\n", + " MatchFullSCC::Bool\n", + " MemCacheSize::Int\n", + " MaxMergeDepth::Int\n", + "end\n", + "\n", + "# Config holds configuration data.\n", + "struct Config\n", + " f_gridRef::Vector{String}\n", + " SrgSpec::String\n", + " SrgShapefileDirectory::String\n", + " InputSR::String\n", + " OutputSR::String\n", + " GridFile::String\n", + " GridName::String\n", + " Counties::String\n", + " EmisShp::String\n", + "end\n", + "\n", + "# IndexInfo holds grid index information for gridded emissions.\n", + "struct IndexInfo\n", + " rows::Vector{Int}\n", + " cols::Vector{Int}\n", + " fracs::Vector{Float64}\n", + " inGrid::Bool\n", + " coveredByGrid::Bool\n", + "end\n", + "\n", + "# Pollutants holds pollutant groups.\n", + "Pollutants = Dict(\n", + " \"EVP__VOC\" => \"VOC\", \"EXH__VOC\" => \"VOC\", \"VOC\" => \"VOC\", \"VOC_INV\" => \"VOC\",\n", + " \"NOX\" => \"NOX\",\n", + " \"NH3\" => \"NH3\",\n", + " \"SO2\" => \"SO2\",\n", + " \"BRK__PM25-PRI\" => \"PM25\", \"EXH__PM25-PRI\" => \"PM25\", \"EXH__PM2_5\" => \"PM25\", \n", + " \"PM25-PRI\" => \"PM25\", \"PM25TOTAL\" => \"PM25\", \"PM2_5\" => \"PM25\", \"TIR__PM25-PRI\" => \"PM25\",\n", + " \"BRK__PM2_5\" => \"PM25\", \"TIR__PM2_5\" => \"PM25\",\n", + ")\n", + "\n", + "# https://www.cmascenter.org/smoke/documentation/4.8.1/html/ch08s02s04.html#d0e38214\n", + "struct FF10NonPointDataFrame <: EmissionsDataFrame\n", + " df::DataFrame\n", + " \n", + " FF10NonPointDataFrame(df::DataFrame) = begin\n", + " if size(df, 2) != 45\n", + " throw(DimensionMismatch(\"FF10 nonpoint file should have 45 fields but instead has $(size(df,2))\"))\n", + " end\n", + " \n", + " rename!(df, [\"COUNTRY\", \"FIPS\", \"TRIBAL_CODE\", \"CENSUS_TRACT\", \"SHAPE_ID\", \"SCC\",\n", + " \"EMIS_TYPE\", \"POLID\", \"ANN_VALUE\",\n", + " \"ANN_PCT_RED\", \"CONTROL_IDS\", \"CONTROL_MEASURES\", \"CURRENT_COST\", \"CUMULATIVE_COST\", \"PROJECTION_FACTOR\",\n", + " \"REG_CODES\", \"CALC_METHOD\", \"CALC_YEAR\", \"DATE_UPDATED\", \"DATA_SET_ID\", \"JAN_VALUE\", \"FEB_VALUE\", \"MAR_VALUE\",\n", + " \"APR_VALUE\", \"MAY_VALUE\", \"JUN_VALUE\", \"JUL_VALUE\", \"AUG_VALUE\", \"SEP_VALUE\", \"OCT_VALUE\", \"NOV_VALUE\", \"DEC_VALUE\",\n", + " \"JAN_PCTRED\", \"FEB_PCTRED\", \"MAR_PCTRED\", \"APR_PCTRED\", \"MAY_PCTRED\", \"JUN_PCTRED\", \"JUL_PCTRED\", \"AUG_PCTRED\",\n", + " \"SEP_PCTRED\", \"OCT_PCTRED\", \"NOV_PCTRED\", \"DEC_PCTRED\", \"COMMENT\"])\n", + " \n", + " df[!, :FIPS] = begin\n", + " fips_transformed = String[]\n", + " for fips in df[!, :FIPS]\n", + " fips_str = string(fips)\n", + " if length(fips_str) == 6\n", + " fips_str = fips_str[2:end]\n", + " end\n", + " push!(fips_transformed, lpad(fips_str, 5, '0'))\n", + " end\n", + " fips_transformed\n", + " end\n", + "\n", + " df[!, :SCC] = [lpad(scc, 10, '0') for scc in string.(df[!, :SCC])]\n", + " \n", + " df.ANN_VALUE = df.ANN_VALUE * tonperyear\n", + " \n", + " df.JAN_VALUE = df.JAN_VALUE * tonpermonth\n", + " df.FEB_VALUE = df.FEB_VALUE * tonpermonth\n", + " df.MAR_VALUE = df.MAR_VALUE * tonpermonth\n", + " df.APR_VALUE = df.APR_VALUE * tonpermonth\n", + " df.MAY_VALUE = df.MAY_VALUE * tonpermonth\n", + " df.JUN_VALUE = df.JUN_VALUE * tonpermonth\n", + " df.JUL_VALUE = df.JUL_VALUE * tonpermonth\n", + " df.AUG_VALUE = df.AUG_VALUE * tonpermonth\n", + " df.SEP_VALUE = df.SEP_VALUE * tonpermonth\n", + " df.OCT_VALUE = df.OCT_VALUE * tonpermonth\n", + " df.NOV_VALUE = df.NOV_VALUE * tonpermonth\n", + " df.DEC_VALUE = df.DEC_VALUE * tonpermonth\n", + " \n", + " return new(df)\n", + " end\n", + "end\n", + "\n", + "struct FF10NonRoadDataFrame <: EmissionsDataFrame\n", + " df::DataFrame\n", + " \n", + " FF10NonRoadDataFrame(df::DataFrame) = new(FF10NonPointDataFrame(df).df)\n", + "end\n", + "\n", + "struct FF10OnRoadDataFrame <: EmissionsDataFrame\n", + " df::DataFrame\n", + " \n", + " FF10OnRoadDataFrame(df::DataFrame) = new(FF10NonPointDataFrame(df).df)\n", + "end\n", + "\n", + "# https://www.cmascenter.org/smoke/documentation/4.8.1/html/ch08s02s08.html#sect_input_ptinv_ff10\n", + "struct FF10PointDataFrame <: EmissionsDataFrame\n", + " df::DataFrame\n", + " \n", + " FF10PointDataFrame(df::DataFrame) = begin\n", + " if size(df, 2) != 77\n", + " throw(DimensionMismatch(\"FF10 point file should have 77 fields but instead has $(size(df,2))\"))\n", + " end\n", + " \n", + " rename!(df, [\n", + " \"COUNTRY\",\"FIPS\",\"TRIBAL_CODE\",\"FACILITY_ID\",\"UNIT_ID\",\"REL_POINT_ID\",\"PROCESS_ID\",\"AGY_FACILITY_ID\",\"AGY_UNIT_ID\", \n", + " \"AGY_REL_POINT_ID\",\"AGY_PROCESS_ID\",\"SCC\",\"POLID\",\"ANN_VALUE\",\"ANN_PCT_RED\",\"FACILITY_NAME\",\"ERPTYPE\",\"STKHGT\",\n", + " \"STKDIAM\",\"STKTEMP\",\"STKFLOW\",\"STKVEL\",\"NAICS\",\"LONGITUDE\",\"LATITUDE\",\"LL_DATUM\",\"HORIZ_COLL_MTHD\",\"DESIGN_CAPACITY\",\n", + " \"DESIGN_CAPACITY_UNITS\",\"REG_CODES\",\"FAC_SOURCE_TYPE\",\"UNIT_TYPE_CODE\",\"CONTROL_IDS\",\"CONTROL_MEASURES\",\n", + " \"CURRENT_COST\",\"CUMULATIVE_COST\",\"PROJECTION_FACTOR\",\"SUBMITTER_FAC_ID\",\"CALC_METHOD\",\"DATA_SET_ID\",\"FACIL_CATEGORY_CODE\",\n", + " \"ORIS_FACILITY_CODE\",\"ORIS_BOILER_ID\",\"IPM_YN\",\"CALC_YEAR\",\"DATE_UPDATED\",\"FUG_HEIGHT\",\"FUG_WIDTH_XDIM\",\"FUG_LENGTH_YDIM\",\n", + " \"FUG_ANGLE\",\"ZIPCODE\",\"ANNUAL_AVG_HOURS_PER_YEAR\",\"JAN_VALUE\", \"FEB_VALUE\", \"MAR_VALUE\",\n", + " \"APR_VALUE\", \"MAY_VALUE\", \"JUN_VALUE\", \"JUL_VALUE\", \"AUG_VALUE\", \"SEP_VALUE\", \"OCT_VALUE\", \"NOV_VALUE\", \"DEC_VALUE\",\n", + " \"JAN_PCTRED\", \"FEB_PCTRED\", \"MAR_PCTRED\", \"APR_PCTRED\", \"MAY_PCTRED\", \"JUN_PCTRED\", \"JUL_PCTRED\", \"AUG_PCTRED\",\n", + " \"SEP_PCTRED\", \"OCT_PCTRED\", \"NOV_PCTRED\", \"DEC_PCTRED\", \"COMMENT\" \n", + " ])\n", + " \n", + " df[!, :FIPS] = begin\n", + " fips_transformed = String[]\n", + " for fips in df[!, :FIPS]\n", + " fips_str = string(fips)\n", + " if length(fips_str) == 6\n", + " fips_str = fips_str[2:end]\n", + " end\n", + " push!(fips_transformed, lpad(fips_str, 5, '0'))\n", + " end\n", + " fips_transformed\n", + " end\n", + "\n", + " \n", + " df[!, :SCC] = [lpad(scc, 10, '0') for scc in string.(df[!, :SCC])]\n", + " \n", + " df.ANN_VALUE = df.ANN_VALUE * tonperyear\n", + " \n", + " df.JAN_VALUE = df.JAN_VALUE * tonpermonth\n", + " df.FEB_VALUE = df.FEB_VALUE * tonpermonth\n", + " df.MAR_VALUE = df.MAR_VALUE * tonpermonth\n", + " df.APR_VALUE = df.APR_VALUE * tonpermonth\n", + " df.MAY_VALUE = df.MAY_VALUE * tonpermonth\n", + " df.JUN_VALUE = df.JUN_VALUE * tonpermonth\n", + " df.JUL_VALUE = df.JUL_VALUE * tonpermonth\n", + " df.AUG_VALUE = df.AUG_VALUE * tonpermonth\n", + " df.SEP_VALUE = df.SEP_VALUE * tonpermonth\n", + " df.OCT_VALUE = df.OCT_VALUE * tonpermonth\n", + " df.NOV_VALUE = df.NOV_VALUE * tonpermonth\n", + " df.DEC_VALUE = df.DEC_VALUE * tonpermonth\n", + " \n", + " df.STKHGT = df.STKHGT * foot\n", + " df.STKDIAM = df.STKDIAM * foot\n", + " df.STKTEMP = kelvin.(df.STKTEMP)\n", + " df.STKFLOW = df.STKFLOW * foot * foot * foot / u\"s\"\n", + " df.STKVEL = df.STKVEL * foot / u\"s\"\n", + " \n", + " # Fill in missing parameters.\n", + " # Commented out for now.\n", + " #flowmissing = ismissing.(df.STKFLOW)\n", + " #df.STKFLOW[flowmissing] .= df.STKVEL[flowmissing] .* (df.STKDIAM[flowmissing] .* df.STKDIAM[flowmissing] / 4 * π)\n", + " \n", + " #velmissing = ismissing.(df.STKVEL)\n", + " #df.STKVEL[velmissing] .= df.STKVEL[velmissing] ./ (df.STKDIAM[velmissing] .* df.STKDIAM[velmissing] / 4 * π)\n", + " \n", + " return new(df)\n", + " end\n", + "end\n", + "\n", + "strip_missing(x) = ismissing(x) ? \"\" : strip(x)\n", + "\n", + "# getCountry returns the Country associated with this code.\n", + "function getCountry(code)\n", + " countryMap = Dict(\n", + " \"0\" => \"USA\",\n", + " \"1\" => \"Canada\",\n", + " \"2\" => \"Mexico\",\n", + " \"3\" => \"Cuba\",\n", + " \"4\" => \"Bahamas\",\n", + " \"5\" => \"Haiti\",\n", + " \"6\" => \"Dominican Republic\",\n", + " \"7\" => \"Global\"\n", + " )\n", + " return get(countryMap, code, \"Unknown\")\n", + "end\n", + "\n", + "# read_grid reads grid file data.\n", + "function read_grid(file_path)\n", + " data = DataFrame(COUNTRY=String[], FIPS=String[], SCC=String[], Surrogate=String[])\n", + " open(file_path) do file\n", + " for line in eachline(file)\n", + " if startswith(line, '#') || !contains(line, ';')\n", + " continue\n", + " end\n", + "\n", + " parts = split(line, ';', limit=3)\n", + "\n", + " if length(parts) >= 3\n", + " surrogate_part = split(parts[3], '!', limit=2)[1]\n", + " fips = parts[1]\n", + " scc = parts[2]\n", + " surrogate = strip(surrogate_part)\n", + "\n", + " country = \"USA\"\n", + " if length(fips) == 6\n", + " country = getCountry(fips[1:1])\n", + " fips = fips[2:end]\n", + " end\n", + "\n", + " if length(scc) == 8\n", + " scc = \"00\" * scc\n", + " end\n", + "\n", + " push!(data, (COUNTRY=country, FIPS=fips, SCC=scc, Surrogate=surrogate))\n", + " end\n", + " end\n", + " end\n", + " return data\n", + "end\n", + "\n", + "# getShapefilePath gets the shapefile path of a DataShapefile or WeightShapefile.\n", + "function getShapefilePath(baseDir::String, ShapefileName::String, CheckShapefiles::Bool)::String\n", + " if CheckShapefiles\n", + " for (root, dirs, files) in walkdir(baseDir)\n", + " for file in files\n", + " if file == ShapefileName * \".shp\"\n", + " return joinpath(root, file)\n", + " end\n", + " end\n", + " end\n", + " @warn \"Shapefile not found: $(joinpath(baseDir, ShapefileName * \".shp\"))\"\n", + " return \"\"\n", + " else\n", + " return joinpath(baseDir, ShapefileName * \".shp\")\n", + " end\n", + "end\n", + "\n", + "# validateShapefile validates a shapefile.\n", + "function validateShapefile(shapefilePath::String)\n", + " if shapefilePath != \"\"\n", + " try\n", + " shp = open(shapefilePath)\n", + " close(shp)\n", + " return true\n", + " catch e\n", + " @warn \"Failed to open shapefile: $shapefilePath; Error: $e\"\n", + " return false\n", + " end\n", + " end\n", + " return false\n", + "end\n", + "\n", + "# readSrgSpecSMOKE reads a SMOKE formatted spatial surrogate specification file.\n", + "# Results are returned as a map of surrogate specifications.\n", + "function readSrgSpecSMOKE(SrgSpec::String, SrgShapefileDirectory::String, CheckShapefiles::Bool=false)\n", + " \n", + " df = CSV.File(SrgSpec; comment=\"#\", header=1, delim=',', quotechar='\"', escapechar='\"') |> DataFrame\n", + "\n", + " srgs = SurrogateSpec[]\n", + "\n", + " for row in eachrow(df)\n", + " WeightColumns = String[]\n", + " WeightFactors = Float64[]\n", + " MergeNames = String[]\n", + " MergeMultipliers = Float64[]\n", + " Region = get(row, 1, \"\") |> strip_missing\n", + " Name = get(row, 2, \"\") |> strip_missing\n", + " Code = get(row, 3, 0)\n", + " DataShapefile = get(row, 4, \"\") |> strip_missing\n", + " DataAttribute = get(row, 5, \"\") |> strip_missing\n", + " WeightShapefile = get(row, 6, \"\") |> strip_missing\n", + " WeightAttribute = get(row, 7, \"\") |> strip_missing\n", + " WeightFunction = get(row, 8, \"\") |> strip_missing\n", + " FilterFunction = get(row, 9, \"\") |> strip_missing\n", + " MergeFunction = get(row, 10, \"\") |> strip_missing\n", + " BackupSurrogateNames = [\n", + " strip(coalesce(get(row, 11, missing), \"\")), \n", + " strip(coalesce(get(row, 12, missing), \"\")), \n", + " strip(coalesce(get(row, 13, missing), \"\"))\n", + " ]\n", + " BackupSurrogateNames = filter(s -> !isempty(s), BackupSurrogateNames)\n", + " Details = get(row, 14, \"\") |> strip_missing\n", + "\n", + " if WeightAttribute != \"none\" && WeightAttribute != \"\"\n", + " push!(WeightColumns, WeightAttribute)\n", + " push!(WeightFactors, 1.0)\n", + " end\n", + "\n", + " if WeightFunction != \"\"\n", + " WeightFunctionParts = split(WeightFunction, \"+\")\n", + " for wf in WeightFunctionParts\n", + " mulFunc = split(wf, \"*\")\n", + " if length(mulFunc) == 1\n", + " push!(WeightColumns, strip(mulFunc[1]))\n", + " push!(WeightFactors, 1.0)\n", + " elseif length(mulFunc) == 2\n", + " factor = tryparse(Float64, strip(mulFunc[1]))\n", + " if isnothing(factor)\n", + " error(\"Invalid weight factor in weight function: $wf\")\n", + " end\n", + " push!(WeightColumns, strip(mulFunc[2]))\n", + " push!(WeightFactors, factor)\n", + " else\n", + " error(\"Invalid weight function format: $wf\")\n", + " end\n", + " end\n", + " end\n", + "\n", + " if MergeFunction != \"none\" && MergeFunction != \"\"\n", + " parts = split(MergeFunction, \"+\")\n", + " for part in parts\n", + " components = split(part, \"*\")\n", + " if length(components) == 2\n", + " Name = strip(components[2])\n", + " val = tryparse(Float64, strip(components[1]))\n", + " if isnothing(val)\n", + " error(\"Invalid merge multiplier in merge function: $part\")\n", + " end\n", + " push!(MergeNames, Name)\n", + " push!(MergeMultipliers, val)\n", + " else\n", + " error(\"Invalid merge function format: $part\")\n", + " end\n", + " end\n", + " end\n", + "\n", + " if !isempty(DataShapefile)\n", + " DataShapefile = getShapefilePath(SrgShapefileDirectory, String(DataShapefile), CheckShapefiles)\n", + " isValidDataShapefile = validateShapefile(DataShapefile)\n", + " end\n", + " \n", + " if !isempty(WeightShapefile)\n", + " WeightShapefile = getShapefilePath(SrgShapefileDirectory, String(WeightShapefile), CheckShapefiles)\n", + " isValidWeightShapefile = validateShapefile(WeightShapefile)\n", + " end\n", + " \n", + " push!(srgs, SurrogateSpec(Region, Name, Code, DataShapefile, DataAttribute,\n", + " WeightShapefile, Details, BackupSurrogateNames,\n", + " WeightColumns, WeightFactors, FilterFunction,\n", + " MergeNames, MergeMultipliers))\n", + " \n", + " end\n", + " \n", + " return srgs\n", + "end\n", + "\n", + "# NewSpatialProcessor creates a new spatial processor.\n", + "function NewSpatialProcessor(srgSpecs::Vector{SurrogateSpec}, grids::GridDef, gridRef::DataFrame, inputSR::String, matchFullSCC::Bool)\n", + " sp = SpatialProcessor(\n", + " srgSpecs,\n", + " grids,\n", + " gridRef,\n", + " inputSR,\n", + " matchFullSCC,\n", + " 100,\n", + " 10\n", + " )\n", + " return sp\n", + "end\n", + "\n", + "# NewPolygon creates a new polygon from a string of coordinates.\n", + "function NewPolygon(polygon_str::String, inputSR::String, outputSR::String)\n", + " points = Point{2, Float64}[]\n", + " regex = r\"X:(-?\\d+(\\.\\d+)?(e[+\\-]?\\d+)?) Y:(-?\\d+(\\.\\d+)?(e[+\\-]?\\d+)?)\"\n", + " trans = Proj4.Transformation(inputSR, outputSR)\n", + " matches = collect(eachmatch(regex, polygon_str))\n", + " for match in matches\n", + " x1 = parse(Float64, match.captures[1])\n", + " y1 = parse(Float64, match.captures[4])\n", + " x, y = trans([x1, y1])\n", + " push!(points, Point(x, y))\n", + " end\n", + " return Polygon(points)\n", + "end\n", + "\n", + "\n", + "# NewGridIrregular creates a new irregular grid.\n", + "function NewGridIrregular(name::String, filepath::String, inputSR::String, outputSR::String)\n", + " gridCells = Vector{LibGEOS.Polygon}()\n", + " gridExtent = Vector{Vector{Tuple{Float64, Float64}}}()\n", + " regex = r\"X:(-?\\d+(\\.\\d+)?(e[+\\-]?\\d+)?) Y:(-?\\d+(\\.\\d+)?(e[+\\-]?\\d+)?)\"\n", + " trans = Proj4.Transformation(inputSR, outputSR)\n", + "\n", + " open(filepath, \"r\") do file\n", + " content = read(file, String)\n", + " polygon_strs = split(replace(content, '\\n' => \"\"), \"]] [[\")\n", + "\n", + " for polygon_str in polygon_strs\n", + " cleaned_str = replace(polygon_str, r\"[\\[\\]]\" => \"\")\n", + " matches = collect(eachmatch(regex, cleaned_str))\n", + "\n", + " wkt_points_str = join([\n", + " let (x1, y1) = (parse(Float64, match.captures[1]), parse(Float64, match.captures[4])),\n", + " (x, y) = trans([x1, y1])\n", + " in\n", + " \"$(x) $(y)\"\n", + " end for match in matches\n", + " ], \", \")\n", + "\n", + " polygon_wkt = \"POLYGON(($wkt_points_str))\"\n", + " polygon = LibGEOS.readgeom(polygon_wkt)\n", + " push!(gridCells, polygon)\n", + "\n", + " polygonCoords = [Tuple(trans([parse(Float64, match.captures[1]), parse(Float64, match.captures[4])])) for match in matches]\n", + " push!(gridExtent, polygonCoords)\n", + " end\n", + " end\n", + " \n", + " Nx = 1\n", + " Ny = length(gridCells)\n", + " \n", + " return GridDef(name, Nx, Ny, outputSR, gridCells, gridExtent)\n", + "end\n", + "\n", + "\n", + "# setupSpatialProcessor reads in the necessary information to initialize\n", + "# a processor for spatializing emissions, and then does so.\n", + "function setupSpatialProcessor(c::Config)\n", + "\n", + " srgSpecs = readSrgSpecSMOKE(c.SrgSpec, c.SrgShapefileDirectory, true)\n", + "\n", + " gridRef = DataFrame(COUNTRY=String[], FIPS=String[], SCC=String[], Surrogate=String[])\n", + " for f in c.f_gridRef\n", + " gridRefTemp = read_grid(f)\n", + " append!(gridRef, gridRefTemp)\n", + " end\n", + "\n", + " grids = NewGridIrregular(c.GridName, c.GridFile, c.OutputSR, c.OutputSR)\n", + "\n", + " sp = NewSpatialProcessor(srgSpecs, grids, gridRef, c.InputSR, false)\n", + "\n", + " return sp, grids\n", + "end\n", + "\n", + "# findCountyPolygon finds county polygon based on location and counties shapefile.\n", + "function findCountyPolygon(loc::String, countiesShapefile::String)\n", + " fips_match = match(r\"\\d+\", loc)\n", + " fips_code = fips_match.match\n", + "\n", + " prj_path = replace(countiesShapefile, \".shp\" => \".prj\")\n", + " tgt_crs = open(prj_path, \"r\") do file\n", + " read(file, String)\n", + " end\n", + " dbf_path = replace(countiesShapefile, \".shp\" => \".dbf\")\n", + " shapes = Shapefile.Table(countiesShapefile)\n", + " dbf_data = DBFTables.Table(dbf_path)\n", + " attributes = DataFrame(dbf_data)\n", + "\n", + " for (index, shape) in enumerate(shapes)\n", + " geoid = string(attributes[index, :GEOID])\n", + "\n", + " if geoid == fips_code\n", + " countyPolygon = GeoInterface.convert(LibGEOS, shape.geometry)\n", + " return countyPolygon, tgt_crs\n", + " end\n", + " end\n", + "\n", + " return nothing, nothing\n", + "end\n", + "\n", + "\n", + "# GetIndex returns the row and column indices for a county or point in the grid.\n", + "function GetIndex(grid::GridDef, gridCRS::String, emissionType::String; loc::String = \"\", countiesShapefile::String = \"\", x::Float64 = NaN, y::Float64 = NaN)\n", + " rows = Int[]\n", + " cols = Int[]\n", + " fracs = Float64[]\n", + " inGrid = false\n", + " coveredByGrid = false\n", + "\n", + " if emissionType == \"point\"\n", + " if isnan(x) || isnan(y)\n", + " println(\"For point emissions, x and y coordinates must be provided\")\n", + " return nothing\n", + " end\n", + "\n", + " point_wkt = \"POINT($x $y)\"\n", + " point = LibGEOS.readgeom(point_wkt)\n", + " pointInCells = Dict()\n", + "\n", + " for (index, ext) in enumerate(grid.Extent)\n", + " coords_str = join([\"$(coord[1]) $(coord[2])\" for coord in ext], \", \")\n", + " cell_wkt = \"POLYGON(($coords_str))\"\n", + " cellPolygon = LibGEOS.readgeom(cell_wkt)\n", + "\n", + " if LibGEOS.intersects(point, cellPolygon)\n", + " push!(pointInCells, index => cellPolygon)\n", + " end\n", + " end\n", + "\n", + " numIntersectingCells = length(pointInCells)\n", + " if numIntersectingCells > 0\n", + " inGrid = true\n", + " fraction = 1.0 / numIntersectingCells\n", + " for (idx, _) in pointInCells\n", + " push!(rows, idx)\n", + " push!(cols, 1)\n", + " push!(fracs, fraction)\n", + " end\n", + " end\n", + "\n", + " coveredByGrid = true\n", + " \n", + " elseif emissionType == \"area\"\n", + " countyPolygon, tgtCRS = findCountyPolygon(loc, countiesShapefile)\n", + " if countyPolygon === nothing || tgtCRS === nothing\n", + " println(\"Could not find Polygon or target CRS for location: $loc\")\n", + " return nothing\n", + " end\n", + " transformer = Proj4.Transformation(gridCRS, tgtCRS)\n", + " totalArea = LibGEOS.area(countyPolygon)\n", + "\n", + " for (index, ext) in enumerate(grid.Extent)\n", + " transformed_coords = [transformer(coord) for coord in ext]\n", + " transformed_coords_str = join([\"$(x) $(y)\" for (x, y) in transformed_coords], \", \")\n", + " cell_wkt = \"POLYGON(($transformed_coords_str))\"\n", + " cellPolygon = LibGEOS.readgeom(cell_wkt)\n", + "\n", + " if LibGEOS.intersects(countyPolygon, cellPolygon)\n", + " intersection = LibGEOS.intersection(countyPolygon, cellPolygon)\n", + " cellArea = LibGEOS.area(intersection)\n", + " push!(fracs, cellArea / totalArea)\n", + " push!(rows, index)\n", + " push!(cols, 1)\n", + " end\n", + " end\n", + "\n", + " if length(rows) > 0\n", + " inGrid = true\n", + " end\n", + " if sum(fracs) > 0.9999\n", + " coveredByGrid = true\n", + " end\n", + "\n", + " else\n", + " println(\"Invalid emission type: $emissionType\")\n", + " return nothing\n", + " end\n", + "\n", + " return IndexInfo(rows, cols, fracs, inGrid, coveredByGrid)\n", + "end\n", + "\n", + "\n", + "# recordToGrid allocates the reciever to the specifed grid without any spatial surrogate.\n", + "function recordToGrid(grid::GridDef, loc::String, countiesShapefile::String, srcCRS::String)\n", + " rows, cols, fracs, inGrid, coveredByGrid = GetIndex(grid, loc, countiesShapefile, srcCRS)\n", + " \n", + " gridSrg = spzeros(Float64, grid.Ny, grid.Nx)\n", + " \n", + " if inGrid\n", + " for i in eachindex(rows)\n", + " gridSrg[rows[i], cols[i]] += fracs[i]\n", + " end\n", + " end\n", + " \n", + " return gridSrg, coveredByGrid, inGrid\n", + "end\n", + "\n", + "# GridFactors returns the normalized fractions of emissions in each grid cell.\n", + "function GridFactors(record::DataFrameRow, sp::SpatialProcessor, countiesShapefile::String, srcCRS::String)\n", + " loc = record.COUNTRY == \"USA\" ? \"US\" * record.FIPS : record.COUNTRY * record.FIPS\n", + " \n", + " return recordToGrid(sp.Grids, loc, countiesShapefile, srcCRS)\n", + "end\n", + "\n", + "# uniqueCoordinates gets unique coordinates from records.\n", + "function uniqueCoordinates(recs::Dict{String, Vector{DataFrameRow}})\n", + " uCoords = Set{Tuple{Float64, Float64}}()\n", + "\n", + " for (scc, rows) in recs\n", + " for row in rows\n", + " if haskey(row, :LONGITUDE) && haskey(row, :LATITUDE)\n", + " lon_lat = (row.LONGITUDE, row.LATITUDE)\n", + " push!(uCoords, lon_lat)\n", + " end\n", + " end\n", + " end\n", + "\n", + " return uCoords\n", + "end\n", + "\n", + "# uniqueLoc gets unique locations (Country+FIPS) from records.\n", + "function uniqueLoc(recs::Dict{String, Vector{DataFrameRow}})\n", + " uLocs = Set{String}()\n", + "\n", + " for (scc, rows) in records\n", + " for row in rows\n", + " country_fips = row.COUNTRY * row.FIPS \n", + " push!(uLocs, country_fips)\n", + " end\n", + " end\n", + "\n", + " return uLocs\n", + "end\n", + "\n", + "# writeEmis writes emissions data to a shapefile.\n", + "function writeEmis(filename::String, gd::GridDef, records::Dict{String, Vector{DataFrameRow}}, indexDict::Dict{Union{Tuple{Float64, Float64}, String}, Union{Nothing, IndexInfo}}, InMAP_crs::String, emissionType::String) \n", + " df = DataFrame(\n", + " cellIndex = Int[],\n", + " VOC = Union{Float64, Missing}[],\n", + " NOX = Union{Float64, Missing}[],\n", + " NH3 = Union{Float64, Missing}[],\n", + " SO2 = Union{Float64, Missing}[],\n", + " PM25 = Union{Float64, Missing}[],\n", + " SCC = String[]\n", + " )\n", + "\n", + " scc_index = Dict()\n", + "\n", + " for (scc, scc_records) in records\n", + " for record in scc_records\n", + " pol = Symbol(record.POLID)\n", + " ann_value = record.ANN_VALUE\n", + " locIdx = nothing\n", + " \n", + " if emissionType == \"point\"\n", + " if haskey(record, :LONGITUDE) && haskey(record, :LATITUDE)\n", + " longitude, latitude = record.LONGITUDE, record.LATITUDE\n", + " locIdx = indexDict[(longitude, latitude)]\n", + " end\n", + " elseif emissionType == \"area\"\n", + " country = record.COUNTRY == \"USA\" ? \"US\" : record.COUNTRY\n", + " loc = country * record.FIPS\n", + " locIdx = indexDict[loc]\n", + " end\n", + "\n", + " if locIdx !== nothing\n", + " for (idx, fraction) in zip(locIdx.rows, locIdx.fracs)\n", + " emission_share = ustrip(ann_value * fraction)\n", + " key = (idx, scc)\n", + "\n", + " if haskey(scc_index, key)\n", + " row_index = scc_index[key]\n", + " current_value = ismissing(df[row_index, pol]) ? 0.0 : df[row_index, pol]\n", + " df[row_index, pol] = current_value + emission_share\n", + " else\n", + " new_row = DataFrame(\n", + " cellIndex = [idx],\n", + " VOC = missing, NOX = missing, NH3 = missing,\n", + " SO2 = missing, PM25 = missing,\n", + " SCC = [scc]\n", + " )\n", + " new_row[!, pol] = [emission_share]\n", + " append!(df, new_row)\n", + " scc_index[key] = size(df, 1)\n", + " end\n", + " end\n", + " end\n", + " end\n", + " end\n", + "\n", + " function format_float(value)\n", + " if ismissing(value)\n", + " return \"0.0\"\n", + " else\n", + " return @sprintf(\"%.10e\", value)\n", + " end\n", + " end\n", + "\n", + " polygons = Vector{LibGEOS.Polygon}()\n", + " attributes = DataFrame(\n", + " cellIndex = Int[],\n", + " VOC = String[], NOX = String[], NH3 = String[],\n", + " SO2 = String[], PM25 = String[], SCC = String[]\n", + " )\n", + "\n", + " for row in eachrow(df)\n", + " if row.cellIndex in keys(gd.Cells)\n", + " push!(polygons, gd.Cells[row.cellIndex])\n", + " push!(attributes, (\n", + " cellIndex = row.cellIndex,\n", + " VOC = format_float(row.VOC),\n", + " NOX = format_float(row.NOX),\n", + " NH3 = format_float(row.NH3),\n", + " SO2 = format_float(row.SO2),\n", + " PM25 = format_float(row.PM25),\n", + " SCC = row.SCC\n", + " ))\n", + " end\n", + " end\n", + "\n", + " writer = Shapefile.Writer(polygons, attributes)\n", + " Shapefile.write(filename, writer, force=true)\n", + "\n", + " prj_filename = replace(filename, r\"\\.shp$\" => \"\") * \".prj\"\n", + " open(prj_filename, \"w\") do f\n", + " write(f, InMAP_crs)\n", + " end\n", + "end\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Configuration\n", + "\n", + "Next, we need to specify which files we are going to load and how we are going to load them.\n", + "There are several different file formats, and we need to specify what format each of the files is in.\n", + "For now, we are focusing on the files that contain annual emissions (rather than daily or hourly emissions\n", + "or activity data)." + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "readpoint(f) = FF10PointDataFrame(CSV.read(joinpath(f), DataFrame, comment=\"#\"))\n", + "readnonpoint(f) = FF10NonPointDataFrame(CSV.read(joinpath(f), DataFrame, comment=\"#\"))\n", + "readnonroad(f) = FF10NonRoadDataFrame(CSV.read(joinpath(f), DataFrame, comment=\"#\"))\n", + "readonroad(f) = FF10OnRoadDataFrame(CSV.read(joinpath(f), DataFrame, comment=\"#\"))\n", + "\n", + "emisdirs = (\n", + " \"2019ge_cb6_19k/inputs/afdust\" => (\n", + " \"2019ge_from_afdust_2017NEIpost_NONPOINT_20210129_04nov2021_v0.csv\" => readnonpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/airports\" => (\n", + " \"2019ge_from_airports_SmokeFlatFile_2019NEI_POINT_20210914_runway_apportion_25jan2022_nf_v1.csv\" => readpoint,\n", + " ),\n", + " # Format is FF10_POINT although first comment is: #FORMAT=FF10_NONPOINT\n", + " \"2019ge_cb6_19k/inputs/canada_ag\" => (\n", + " \"canada_MYR_2016_ag_animal_NH3_VOC_monthly_12km_aggregated_09jun2021_v0.csv\" => readpoint,\n", + " \"canada_MYR_2016_ag_fertilizer_NH3_monthly_12km_16apr2021_v0.csv\" => readpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/canada_og2D\" => (\n", + " \"canada_MYR_2016_point_UOG_15jun2021_nf_v1.csv\" => readpoint,\n", + " ),\n", + " #\"2019ge_cb6_19k/inputs/cem\" => ( \n", + " #),\n", + " \"2019ge_cb6_19k/inputs/cmv_c1c2_12\" => (\n", + " \"cmv_c1c2_2019_12US1_2019_CA_annual_11feb2022_v0.csv\" => readpoint,\n", + " \"cmv_c1c2_2019_12US1_2019_MX_annual_11feb2022_v0.csv\" => readpoint,\n", + " \"cmv_c1c2_2019_12US1_2019_US_annual_11feb2022_v0.csv\" => readpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/cmv_c3_12\" => (\n", + " \"cmv_c3_2019_masked_12US1_2019_CA_annual_10feb2022_v0.csv\" => readpoint,\n", + " \"cmv_c3_2019_masked_12US1_2019_MX_annual_10feb2022_v0.csv\" => readpoint,\n", + " \"cmv_c3_2019_masked_12US1_2019_US_annual_10feb2022_v0.csv\" => readpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/fertilizer\" => (\n", + " \"2019ge_fertilizer_NH3_monthly_23jun2022_v0.csv\" => readnonpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/livestock\" => (\n", + " \"2019ge_from_livestock_2017NEIpost_NONPOINT_20210129_29nov2021_nf_v1.csv\" => readnonpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/nonpt\" => (\n", + " \"2017NEIpost_NONPOINT_20210129_08oct2021_v2.csv\" => readnonpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/nonroad\" => (\n", + " \"2019_interpolation_texas_nonroad_29oct2021_v1.csv\" => readnonroad,\n", + " \"2019ge_from_2017nei_nonroad_california_29oct2021_v0.csv\" => readnonroad,\n", + " \"2019ge_nonroad_from_MOVES_aggSCC_29oct2021_v1.csv\" => readnonroad, \n", + " ),\n", + " \"2019ge_cb6_19k/inputs/np_oilgas\" => (\n", + " \"np_oilgas_2019ge_02mar2022_nf_v3.csv\" => readnonpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/np_solvents\" => (\n", + " \"nonVCPy_solvents_2017NEIpost_NONPOINT_20210129_15feb2022_nf_v1.csv\" => readnonpoint,\n", + " \"solvents_2017NEIpost_NONPOINT_20210129_nonVCPy_polls_for2016v3_2019ge_15feb2022_v0.csv\" => readnonpoint,\n", + " \"VCPy_solvents_2019ge_minuspoint_21feb2022_nf_v2.csv\" => readnonpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/onroad\" => (\n", + " \"2019ge_SMOKE_MOVES_MOVES3_AQ_fullHAP_30nov2021_v0.csv\" => readonroad,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/onroad_can\" => (\n", + " \"canada_2019ge_projection_T3_onroad_monthly_14mar2022_v0.csv\" => readonroad,\n", + " \"canada_2019ge_projection_T3_onroad_refueling_14mar2022_v0.csv\" => readonroad,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/onroad_mex\" => (\n", + " \"Mexico_2019_onroad_MOVES_interpolated_22oct2021_v0.csv\" => readonroad,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/othafdust\" => (\n", + " \"canada_MYR_2016_area_dust_27dec2020_v0.csv\" => readnonpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/othar\" => (\n", + " \"2019ge_proj_CEDS_from_Mexico_2016INEM_nonpoint_09nov2021_v0.csv\" => readnonpoint,\n", + " \"2019ge_proj_CEDS_from_Mexico_2016INEM_nonroad_09nov2021_v0.csv\" => readnonroad,\n", + " \"canada_2019ge_proj_from2016_T4_nonroad_monthly_15mar2022_v0.csv\" => readnonroad,\n", + " \"canada_MYR_2016_area_other_27dec2020_v0.csv\" => readnonpoint,\n", + " \"canada_MYR_2016_T5_rail_27dec2020_v0.csv\" => readnonpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/othpt\" => (\n", + " \"2019ge_proj_CEDS_from_Mexico_2016_point_20191209_06mar2023_nf_v3.csv\" => readpoint,\n", + " \"canada_MYR_2016_point_CB6VOC_monthly_27dec2020_v0.csv\" => readpoint,\n", + " \"canada_MYR_2016_point_nodust_noVOC_monthly_27dec2020_v0.csv\" => readpoint,\n", + " \"canada_MYR_2016_point_UOG_15jun2021_nf_v2.csv\" => readpoint,\n", + " \"canada_MYR_2016_point_VOC_INV_monthly_27dec2020_v0.csv\" => readpoint,\n", + " \"canada_MYR_2016_T1_airports_monthly_27dec2020_v0.csv\" => readpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/othptdust\" => (\n", + " \"canada_MYR_2016_ag_animal_PM10_monthly_12km_27dec2020_v0.csv\" => readpoint,\n", + " \"canada_MYR_2016_ag_animal_PM25_monthly_12km_27dec2020_v0.csv\" => readpoint,\n", + " \"canada_MYR_2016_ag_harvest_monthly_12km_27dec2020_v0.csv\" => readpoint,\n", + " \"canada_MYR_2016_ag_tillage_monthly_12km_27dec2020_v0.csv\" => readpoint,\n", + " \"canada_MYR_2016_point_source_dust_monthly_27dec2020_v0.csv\" => readpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/pt_oilgas\" => (\n", + " \"2019ge_from_oilgas_2019NEI_SmokeFlatFile_POINT_20220325_calcyear2017_07apr2022_v0.csv\" => readpoint,\n", + " \"oilgas_2019NEI_SmokeFlatFile_POINT_20220325_calcyear2019_06apr2022_v0.csv\" => readpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/ptagfire\" => (\n", + " \"ptinv_agfire_CONUS_2019ge_23dec2021_v0.csv\" => readpoint,\n", + " \"ptinv_agfire_FL_2019ge_23dec2021_v0.csv\" => readpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/ptegu\" => (\n", + " \"egucems_2019NEI_SmokeFlatFile_POINT_20220325_07apr2022_v0.csv\" => readpoint,\n", + " \"egunoncems_2019NEI_SmokeFlatFile_POINT_20220325_08apr2022_nf_v1.csv\" => readpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/ptfire_othna\" => (\n", + " \"ptinv_finn_CA_finn_2019ge_ff10_26oct2021_v0.csv\" => readpoint,\n", + " \"ptinv_finn_MX_finn_2019ge_ff10_26oct2021_v0.csv\" => readpoint,\n", + " \"ptinv_finn_ONA_finn_2019ge_ff10_26oct2021_v0.csv\" => readpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/ptfire-rx\" => (\n", + " \"ptinv_ptfire_FH_grassland_2019ge_23dec2021_v0.csv\" => readpoint,\n", + " \"ptinv_ptfire_sf2_2019ge_bsp_16dec2021_nf_v1.csv\" => readpoint,\n", + " \"ptinv_ptfire_sf2_2019ge_bsp_haps_16dec2021_nf_v1.csv\" => readpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/ptfire-wild\" => (\n", + " \"ptinv_ptfire_sf2_2019ge_bsp_16dec2021_nf_v2.csv\" => readpoint,\n", + " \"ptinv_ptfire_sf2_2019ge_bsp_haps_16dec2021_nf_v2.csv\" => readpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/ptnonipm\" => (\n", + " \"2019_eto_ptnonipm_supplemental_07apr2022_nf_v1.csv\" => readpoint,\n", + " \"nonegu_2019NEI_SmokeFlatFile_POINT_20220325_07apr2022_nf_v1.csv\" => readpoint,\n", + " \"railyard_postprojection_2019NEI_SmokeFlatFile_POINT_20211221_05apr2022_nf_v1.csv\" => readpoint,\n", + " ),\n", + " \"2019ge_cb6_19k/inputs/rail\" => (\n", + " \"2019ge_from_rail_2017NEIpost_NONPOINT_20210129_04jan2022_v0.csv\" => readnonpoint,\n", + " ),\n", + " # CONUS Only\n", + " \"2019ge_cb6_19k/inputs/rwc\" => (\n", + " \"rwc_2017NEIpost_NONPOINT_20210129_06apr2022_nf_v6.csv\" => readnonpoint,\n", + " ), \n", + ");" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# ptegu - for testing\n", + "readpoint(f) = FF10PointDataFrame(CSV.read(joinpath(f), DataFrame, comment=\"#\"))\n", + "readnonpoint(f) = FF10NonPointDataFrame(CSV.read(joinpath(f), DataFrame, comment=\"#\"))\n", + "readnonroad(f) = FF10NonRoadDataFrame(CSV.read(joinpath(f), DataFrame, comment=\"#\"))\n", + "readonroad(f) = FF10OnRoadDataFrame(CSV.read(joinpath(f), DataFrame, comment=\"#\"))\n", + "\n", + "emisdirs = (\n", + " \n", + " \"2019ge_cb6_19k/inputs/ptegu\" => (\n", + " \"egucems_2019NEI_SmokeFlatFile_POINT_20220325_07apr2022_v0.csv\" => readpoint,\n", + " \"egunoncems_2019NEI_SmokeFlatFile_POINT_20220325_08apr2022_nf_v1.csv\" => readpoint,\n", + " ), \n", + ");" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load emissions\n", + "\n", + "Now we iterate through each of the files listed above and read them into dataframes. Each file contains a lot of data; however, we are only interested in the pollutant name, country name, FIPS, and the Source Classification Code (SCC) that describes the type of emitter, as well as the annual emissions rate, longitude (for point sources), and latitude (for point sources). Therefore, for now, we will only save these variables." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "dfs = []\n", + "\n", + "for (emisdir, data) ∈ emisdirs\n", + " for (file, readfunc) ∈ data\n", + " path = joinpath(dir, emisdir, file)\n", + " df = readfunc(path)\n", + " \n", + " if readfunc === readpoint\n", + " push!(dfs, df.df[!, [:POLID, :COUNTRY, :FIPS, :SCC, :ANN_VALUE, :LONGITUDE, :LATITUDE]])\n", + " elseif readfunc === readnonpoint || readfunc === readnonroad || readfunc === readonroad\n", + " area_df = df.df[!, [:POLID, :COUNTRY, :FIPS, :SCC, :ANN_VALUE]]\n", + " area_df[!, :LONGITUDE] = missing\n", + " area_df[!, :LATITUDE] = missing\n", + " push!(dfs, area_df)\n", + " else\n", + " println(\"Unknown emission type\")\n", + " end\n", + " end\n", + "end\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Results\n", + "\n", + "Next, let's combine all the files together into one big dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
207053×7 DataFrame
207028 rows omitted
RowPOLIDCOUNTRYFIPSSCCANN_VALUELONGITUDELATITUDE
String15String3StringStringQuantity…Float64Float64
1PM25-PRIUS2502700201002010.000260206 kg s^-1-72.015142.1124
2PM10-PRIUS2502700201002010.000260206 kg s^-1-72.015142.1124
3PM10-PRIUS2104900201001013.12612e-7 kg s^-1-84.102537.8824
4PM25-PRIUS2104900201001012.84943e-7 kg s^-1-84.102537.8824
5PM10-PRIUS2104900201001014.39207e-7 kg s^-1-84.102537.8824
6PM25-PRIUS2104900201001014.00333e-7 kg s^-1-84.102537.8824
7PM25-PRIUS1809700102006017.76502e-5 kg s^-1-86.166539.7629
8PM10-PRIUS1809700102006017.76502e-5 kg s^-1-86.166539.7629
9PM25-PRIUS2712300201002015.92593e-5 kg s^-1-93.111644.9315
10PM10-PRIUS2712300201002018.13521e-5 kg s^-1-93.111644.9315
1118540299US0402700101006042.58008e-9 kg s^-1-114.71132.7214
1216065831US0402700101006046.19202e-8 kg s^-1-114.71132.7214
13PM25-PRIUS0112900201002012.87666e-5 kg s^-1-88.030831.2539
207042OCUS0226100201001024.07002e-5 kg s^-1-145.52962.1186
207043PMFINEUS0226100201001021.13764e-5 kg s^-1-145.52962.1186
207044SO4US0226100201001026.83746e-7 kg s^-1-145.52962.1186
207045ECUS0226100201001020.000441994 kg s^-1-145.52962.1186
207046NO3US0226100201001026.53935e-7 kg s^-1-145.52962.1186
207047OCUS0226100201001020.000100641 kg s^-1-145.52962.1186
207048PMFINEUS0226100201001022.81307e-5 kg s^-1-145.52962.1186
207049SO4US0226100201001021.69072e-6 kg s^-1-145.52962.1186
207050ECUS0226100103004033.80295e-10 kg s^-1-145.52962.1186
207051OCUS0226100103004033.80295e-10 kg s^-1-145.52962.1186
207052PMFINEUS0226100103004032.05267e-8 kg s^-1-145.52962.1186
207053SO4US0226100103004031.67267e-8 kg s^-1-145.52962.1186
" + ], + "text/latex": [ + "\\begin{tabular}{r|ccccccc}\n", + "\t& POLID & COUNTRY & FIPS & SCC & ANN\\_VALUE & LONGITUDE & LATITUDE\\\\\n", + "\t\\hline\n", + "\t& String15 & String3 & String & String & Quantity… & Float64 & Float64\\\\\n", + "\t\\hline\n", + "\t1 & PM25-PRI & US & 25027 & 0020100201 & 0.000260206 kg s\\^-1 & -72.0151 & 42.1124 \\\\\n", + "\t2 & PM10-PRI & US & 25027 & 0020100201 & 0.000260206 kg s\\^-1 & -72.0151 & 42.1124 \\\\\n", + "\t3 & PM10-PRI & US & 21049 & 0020100101 & 3.12612e-7 kg s\\^-1 & -84.1025 & 37.8824 \\\\\n", + "\t4 & PM25-PRI & US & 21049 & 0020100101 & 2.84943e-7 kg s\\^-1 & -84.1025 & 37.8824 \\\\\n", + "\t5 & PM10-PRI & US & 21049 & 0020100101 & 4.39207e-7 kg s\\^-1 & -84.1025 & 37.8824 \\\\\n", + "\t6 & PM25-PRI & US & 21049 & 0020100101 & 4.00333e-7 kg s\\^-1 & -84.1025 & 37.8824 \\\\\n", + "\t7 & PM25-PRI & US & 18097 & 0010200601 & 7.76502e-5 kg s\\^-1 & -86.1665 & 39.7629 \\\\\n", + "\t8 & PM10-PRI & US & 18097 & 0010200601 & 7.76502e-5 kg s\\^-1 & -86.1665 & 39.7629 \\\\\n", + "\t9 & PM25-PRI & US & 27123 & 0020100201 & 5.92593e-5 kg s\\^-1 & -93.1116 & 44.9315 \\\\\n", + "\t10 & PM10-PRI & US & 27123 & 0020100201 & 8.13521e-5 kg s\\^-1 & -93.1116 & 44.9315 \\\\\n", + "\t11 & 18540299 & US & 04027 & 0010100604 & 2.58008e-9 kg s\\^-1 & -114.711 & 32.7214 \\\\\n", + "\t12 & 16065831 & US & 04027 & 0010100604 & 6.19202e-8 kg s\\^-1 & -114.711 & 32.7214 \\\\\n", + "\t13 & PM25-PRI & US & 01129 & 0020100201 & 2.87666e-5 kg s\\^-1 & -88.0308 & 31.2539 \\\\\n", + "\t14 & PM10-PRI & US & 01129 & 0020100201 & 2.87666e-5 kg s\\^-1 & -88.0308 & 31.2539 \\\\\n", + "\t15 & 16065831 & US & 01129 & 0020100201 & 8.00863e-9 kg s\\^-1 & -88.0308 & 31.2539 \\\\\n", + "\t16 & 18540299 & US & 01129 & 0020100201 & 3.33693e-10 kg s\\^-1 & -88.0308 & 31.2539 \\\\\n", + "\t17 & PM10-PRI & US & 21157 & 0010200601 & 5.16723e-5 kg s\\^-1 & -88.3534 & 37.0481 \\\\\n", + "\t18 & PM25-PRI & US & 21157 & 0010200601 & 5.16723e-5 kg s\\^-1 & -88.3534 & 37.0481 \\\\\n", + "\t19 & PM10-PRI & US & 21157 & 0010200601 & 5.33911e-5 kg s\\^-1 & -88.3534 & 37.0481 \\\\\n", + "\t20 & PM25-PRI & US & 21157 & 0010200601 & 5.33911e-5 kg s\\^-1 & -88.3534 & 37.0481 \\\\\n", + "\t21 & 16065831 & US & 56037 & 0010100501 & 4.71773e-10 kg s\\^-1 & -108.782 & 41.738 \\\\\n", + "\t22 & 18540299 & US & 56037 & 0010100501 & 1.0356e-10 kg s\\^-1 & -108.782 & 41.738 \\\\\n", + "\t23 & 1330207 & US & 48277 & 0020100201 & 1.76915e-6 kg s\\^-1 & -95.5575 & 33.6969 \\\\\n", + "\t24 & SO2 & US & 48277 & 0020100201 & 1.7447e-5 kg s\\^-1 & -95.5575 & 33.6969 \\\\\n", + "\t25 & 100414 & US & 48277 & 0020100201 & 8.83136e-7 kg s\\^-1 & -95.5575 & 33.6969 \\\\\n", + "\t26 & PM25-PRI & US & 48277 & 0020100201 & 0.000218566 kg s\\^-1 & -95.5575 & 33.6969 \\\\\n", + "\t27 & 130498292 & US & 48277 & 0020100201 & 6.041e-8 kg s\\^-1 & -95.5575 & 33.6969 \\\\\n", + "\t28 & PM10-PRI & US & 48277 & 0020100201 & 0.000220703 kg s\\^-1 & -95.5575 & 33.6969 \\\\\n", + "\t29 & 108883 & US & 48277 & 0020100201 & 3.59295e-6 kg s\\^-1 & -95.5575 & 33.6969 \\\\\n", + "\t30 & 71432 & US & 48277 & 0020100201 & 3.30816e-7 kg s\\^-1 & -95.5575 & 33.6969 \\\\\n", + "\t$\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ \\\\\n", + "\\end{tabular}\n" + ], + "text/plain": [ + "\u001b[1m207053×7 DataFrame\u001b[0m\n", + "\u001b[1m Row \u001b[0m│\u001b[1m POLID \u001b[0m\u001b[1m COUNTRY \u001b[0m\u001b[1m FIPS \u001b[0m\u001b[1m SCC \u001b[0m\u001b[1m ANN_VALUE \u001b[0m\u001b[1m LONGITUD\u001b[0m ⋯\n", + " │\u001b[90m String15 \u001b[0m\u001b[90m String3 \u001b[0m\u001b[90m String \u001b[0m\u001b[90m String \u001b[0m\u001b[90m Quantity… \u001b[0m\u001b[90m Float64 \u001b[0m ⋯\n", + "────────┼───────────────────────────────────────────────────────────────────────\n", + " 1 │ PM25-PRI US 25027 0020100201 0.000260206 kg s^-1 -72.015 ⋯\n", + " 2 │ PM10-PRI US 25027 0020100201 0.000260206 kg s^-1 -72.015\n", + " 3 │ PM10-PRI US 21049 0020100101 3.12612e-7 kg s^-1 -84.102\n", + " 4 │ PM25-PRI US 21049 0020100101 2.84943e-7 kg s^-1 -84.102\n", + " 5 │ PM10-PRI US 21049 0020100101 4.39207e-7 kg s^-1 -84.102 ⋯\n", + " 6 │ PM25-PRI US 21049 0020100101 4.00333e-7 kg s^-1 -84.102\n", + " 7 │ PM25-PRI US 18097 0010200601 7.76502e-5 kg s^-1 -86.166\n", + " 8 │ PM10-PRI US 18097 0010200601 7.76502e-5 kg s^-1 -86.166\n", + " 9 │ PM25-PRI US 27123 0020100201 5.92593e-5 kg s^-1 -93.111 ⋯\n", + " 10 │ PM10-PRI US 27123 0020100201 8.13521e-5 kg s^-1 -93.111\n", + " 11 │ 18540299 US 04027 0010100604 2.58008e-9 kg s^-1 -114.711\n", + " ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱\n", + " 207044 │ SO4 US 02261 0020100102 6.83746e-7 kg s^-1 -145.529\n", + " 207045 │ EC US 02261 0020100102 0.000441994 kg s^-1 -145.529 ⋯\n", + " 207046 │ NO3 US 02261 0020100102 6.53935e-7 kg s^-1 -145.529\n", + " 207047 │ OC US 02261 0020100102 0.000100641 kg s^-1 -145.529\n", + " 207048 │ PMFINE US 02261 0020100102 2.81307e-5 kg s^-1 -145.529\n", + " 207049 │ SO4 US 02261 0020100102 1.69072e-6 kg s^-1 -145.529 ⋯\n", + " 207050 │ EC US 02261 0010300403 3.80295e-10 kg s^-1 -145.529\n", + " 207051 │ OC US 02261 0010300403 3.80295e-10 kg s^-1 -145.529\n", + " 207052 │ PMFINE US 02261 0010300403 2.05267e-8 kg s^-1 -145.529\n", + " 207053 │ SO4 US 02261 0010300403 1.67267e-8 kg s^-1 -145.529 ⋯\n", + "\u001b[36m 2 columns and 207032 rows omitted\u001b[0m" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "emis = vcat(dfs...)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A lot of the records above are duplicates; let's combine them all. " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
119725×7 DataFrame
119700 rows omitted
RowPOLIDCOUNTRYFIPSSCCLONGITUDELATITUDEANN_VALUE
String15String3StringStringFloat64Float64Quantity…
1CO2US181730010100202-87.332937.914795.6968 kg s^-1
2CO2US181730010100203-87.333337.914551.9336 kg s^-1
3CO2US060290020100201-119.4735.280243.1176 kg s^-1
4CO2US180890020100201-87.477541.673941.6498 kg s^-1
5CO2US481670020200203-94.933229.377736.5314 kg s^-1
6CO2US470750020100201-89.392535.649935.107 kg s^-1
7CO2US481670020200203-94.932229.377734.9882 kg s^-1
8CO2US481670020200203-94.932729.377729.4392 kg s^-1
9CO2US330070010100903-71.175244.471225.6217 kg s^-1
10CO2US482030020200203-94.690232.44821.6134 kg s^-1
11CO2US482030020200203-94.690232.447721.4005 kg s^-1
12CO2US390350020100201-81.714241.473820.8558 kg s^-1
13CO2US060770010100902-121.3337.943817.2726 kg s^-1
1197147439976US360050020200102-73.914240.7980.0 kg s^-1
11971583329US360050020200102-73.914240.7980.0 kg s^-1
119716120127US360050020200102-73.909140.79870.0 kg s^-1
119717191242US360050020200102-73.909140.79870.0 kg s^-1
119718193395US360050020200102-73.909140.79870.0 kg s^-1
119719205992US360050020200102-73.909140.79870.0 kg s^-1
119720218019US360050020200102-73.909140.79870.0 kg s^-1
11972150328US360050020200102-73.909140.79870.0 kg s^-1
11972253703US360050020200102-73.909140.79870.0 kg s^-1
11972356553US360050020200102-73.909140.79870.0 kg s^-1
1197247439976US360050020200102-73.909140.79870.0 kg s^-1
11972583329US360050020200102-73.909140.79870.0 kg s^-1
" + ], + "text/latex": [ + "\\begin{tabular}{r|ccccccc}\n", + "\t& POLID & COUNTRY & FIPS & SCC & LONGITUDE & LATITUDE & ANN\\_VALUE\\\\\n", + "\t\\hline\n", + "\t& String15 & String3 & String & String & Float64 & Float64 & Quantity…\\\\\n", + "\t\\hline\n", + "\t1 & CO2 & US & 18173 & 0010100202 & -87.3329 & 37.9147 & 95.6968 kg s\\^-1 \\\\\n", + "\t2 & CO2 & US & 18173 & 0010100203 & -87.3333 & 37.9145 & 51.9336 kg s\\^-1 \\\\\n", + "\t3 & CO2 & US & 06029 & 0020100201 & -119.47 & 35.2802 & 43.1176 kg s\\^-1 \\\\\n", + "\t4 & CO2 & US & 18089 & 0020100201 & -87.4775 & 41.6739 & 41.6498 kg s\\^-1 \\\\\n", + "\t5 & CO2 & US & 48167 & 0020200203 & -94.9332 & 29.3777 & 36.5314 kg s\\^-1 \\\\\n", + "\t6 & CO2 & US & 47075 & 0020100201 & -89.3925 & 35.6499 & 35.107 kg s\\^-1 \\\\\n", + "\t7 & CO2 & US & 48167 & 0020200203 & -94.9322 & 29.3777 & 34.9882 kg s\\^-1 \\\\\n", + "\t8 & CO2 & US & 48167 & 0020200203 & -94.9327 & 29.3777 & 29.4392 kg s\\^-1 \\\\\n", + "\t9 & CO2 & US & 33007 & 0010100903 & -71.1752 & 44.4712 & 25.6217 kg s\\^-1 \\\\\n", + "\t10 & CO2 & US & 48203 & 0020200203 & -94.6902 & 32.448 & 21.6134 kg s\\^-1 \\\\\n", + "\t11 & CO2 & US & 48203 & 0020200203 & -94.6902 & 32.4477 & 21.4005 kg s\\^-1 \\\\\n", + "\t12 & CO2 & US & 39035 & 0020100201 & -81.7142 & 41.4738 & 20.8558 kg s\\^-1 \\\\\n", + "\t13 & CO2 & US & 06077 & 0010100902 & -121.33 & 37.9438 & 17.2726 kg s\\^-1 \\\\\n", + "\t14 & CO2 & US & 13095 & 0010100912 & -84.1076 & 31.554 & 15.9262 kg s\\^-1 \\\\\n", + "\t15 & CO2 & US & 51670 & 0010100204 & -77.2835 & 37.2974 & 13.899 kg s\\^-1 \\\\\n", + "\t16 & CO2 & US & 51031 & 0010100204 & -79.2736 & 37.119 & 11.7696 kg s\\^-1 \\\\\n", + "\t17 & CO2 & US & 50007 & 0010100902 & -73.2088 & 44.4933 & 11.6236 kg s\\^-1 \\\\\n", + "\t18 & CO2 & US & 12095 & 0020100201 & -81.1692 & 28.4881 & 10.6814 kg s\\^-1 \\\\\n", + "\t19 & CO2 & US & 23007 & 0010100902 & -70.4249 & 45.1414 & 10.2792 kg s\\^-1 \\\\\n", + "\t20 & CO2 & US & 23001 & 0010100902 & -70.162 & 44.431 & 9.97696 kg s\\^-1 \\\\\n", + "\t21 & CO2 & US & 12095 & 0020100201 & -81.1675 & 28.4881 & 9.22016 kg s\\^-1 \\\\\n", + "\t22 & CO2 & US & 53027 & 0010100902 & -123.774 & 46.9695 & 9.05911 kg s\\^-1 \\\\\n", + "\t23 & CO2 & US & 47075 & 0020100201 & -89.3964 & 35.6578 & 8.77253 kg s\\^-1 \\\\\n", + "\t24 & CO2 & US & 26039 & 0010100902 & -84.6903 & 44.6045 & 8.15966 kg s\\^-1 \\\\\n", + "\t25 & CO2 & US & 33003 & 0010100903 & -71.1969 & 43.8358 & 7.81778 kg s\\^-1 \\\\\n", + "\t26 & CO2 & US & 37155 & 0010100911 & -78.9963 & 34.59 & 7.77879 kg s\\^-1 \\\\\n", + "\t27 & CO2 & US & 27139 & 0010101209 & -93.5387 & 44.795 & 7.35276 kg s\\^-1 \\\\\n", + "\t28 & CO2 & US & 45005 & 0010200902 & -81.2837 & 32.9999 & 7.34286 kg s\\^-1 \\\\\n", + "\t29 & CO2 & US & 45035 & 0010200902 & -80.4547 & 33.2401 & 7.11727 kg s\\^-1 \\\\\n", + "\t30 & CO2 & US & 51175 & 0010200219 & -76.9968 & 36.6506 & 6.4692 kg s\\^-1 \\\\\n", + "\t$\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ \\\\\n", + "\\end{tabular}\n" + ], + "text/plain": [ + "\u001b[1m119725×7 DataFrame\u001b[0m\n", + "\u001b[1m Row \u001b[0m│\u001b[1m POLID \u001b[0m\u001b[1m COUNTRY \u001b[0m\u001b[1m FIPS \u001b[0m\u001b[1m SCC \u001b[0m\u001b[1m LONGITUDE \u001b[0m\u001b[1m LATITUDE \u001b[0m\u001b[1m ANN_VALU\u001b[0m ⋯\n", + " │\u001b[90m String15 \u001b[0m\u001b[90m String3 \u001b[0m\u001b[90m String \u001b[0m\u001b[90m String \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\u001b[90m Quantity\u001b[0m ⋯\n", + "────────┼───────────────────────────────────────────────────────────────────────\n", + " 1 │ CO2 US 18173 0010100202 -87.3329 37.9147 95.6968 ⋯\n", + " 2 │ CO2 US 18173 0010100203 -87.3333 37.9145 51.9336\n", + " 3 │ CO2 US 06029 0020100201 -119.47 35.2802 43.1176\n", + " 4 │ CO2 US 18089 0020100201 -87.4775 41.6739 41.6498\n", + " 5 │ CO2 US 48167 0020200203 -94.9332 29.3777 36.5314 ⋯\n", + " 6 │ CO2 US 47075 0020100201 -89.3925 35.6499 35.107\n", + " 7 │ CO2 US 48167 0020200203 -94.9322 29.3777 34.9882\n", + " 8 │ CO2 US 48167 0020200203 -94.9327 29.3777 29.4392\n", + " 9 │ CO2 US 33007 0010100903 -71.1752 44.4712 25.6217 ⋯\n", + " 10 │ CO2 US 48203 0020200203 -94.6902 32.448 21.6134\n", + " 11 │ CO2 US 48203 0020200203 -94.6902 32.4477 21.4005\n", + " ⋮ │ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋱\n", + " 119716 │ 120127 US 36005 0020200102 -73.9091 40.7987 0.0\n", + " 119717 │ 191242 US 36005 0020200102 -73.9091 40.7987 0.0 ⋯\n", + " 119718 │ 193395 US 36005 0020200102 -73.9091 40.7987 0.0\n", + " 119719 │ 205992 US 36005 0020200102 -73.9091 40.7987 0.0\n", + " 119720 │ 218019 US 36005 0020200102 -73.9091 40.7987 0.0\n", + " 119721 │ 50328 US 36005 0020200102 -73.9091 40.7987 0.0 ⋯\n", + " 119722 │ 53703 US 36005 0020200102 -73.9091 40.7987 0.0\n", + " 119723 │ 56553 US 36005 0020200102 -73.9091 40.7987 0.0\n", + " 119724 │ 7439976 US 36005 0020200102 -73.9091 40.7987 0.0\n", + " 119725 │ 83329 US 36005 0020200102 -73.9091 40.7987 0.0 ⋯\n", + "\u001b[36m 1 column and 119704 rows omitted\u001b[0m" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "emis_grouped = sort(combine(groupby(emis, [:POLID, :COUNTRY, :FIPS, :SCC, :LONGITUDE, :LATITUDE]), :ANN_VALUE=>sum=>:ANN_VALUE), :ANN_VALUE, rev=true)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we filter the grouped emissions records to include only the rows where POLID is a key in the Pollutants dictionary, and we group the emissions records by their SCC." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "filtered_emis_grouped = filter(row -> row.POLID in keys(Pollutants), emis_grouped)\n", + "for row in eachrow(filtered_emis_grouped)\n", + " row.POLID = Pollutants[row.POLID]\n", + "end\n", + "\n", + "records = Dict{String, Vector{DataFrameRow}}()\n", + "\n", + "for row in eachrow(filtered_emis_grouped)\n", + " scc = row.SCC\n", + " if haskey(records, scc)\n", + " push!(records[scc], row)\n", + " else\n", + " records[scc] = [row]\n", + " end\n", + "end\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we will perform spatial and temporal processing on these emissions and save the results to a shapefile." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "440" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This is an example for ptegu. We will update it to loop over all sectors.\n", + "\n", + "c = Config([\n", + "\"C:/Users/hemamipo/OneDrive - University of Illinois - Urbana/Research-UIUC/Tessum Group/Tasks/NEI_Processor/2019nei/ge_dat/gridding/agref_us_2017platform_13jan2022_nf_v12.txt\",\n", + "\"C:/Users/hemamipo/OneDrive - University of Illinois - Urbana/Research-UIUC/Tessum Group/Tasks/NEI_Processor/2019nei/ge_dat/gridding/amgref_can2015_mex2010_for2016beta_09mar2021_nf_v5.txt\",\n", + "\"C:/Users/hemamipo/OneDrive - University of Illinois - Urbana/Research-UIUC/Tessum Group/Tasks/NEI_Processor/2019nei/ge_dat/gridding/mgref_onroad_MOVES3_23sep2021_v1.txt\"\n", + "],\n", + "\"C:\\\\Users\\\\hemamipo\\\\OneDrive - University of Illinois - Urbana\\\\Research-UIUC\\\\Tessum Group\\\\Tasks\\\\NEI_Processor\\\\Scripts\\\\surrogate_specification_2019.csv\",\n", + "\"C:\\\\Users\\\\hemamipo\\\\Downloads\\\\tmp2\\\\NEI_Processor\\\\nei2019\\\\Shapefiles\",\n", + "\"+proj=longlat\",\n", + "\"+proj=lcc +lat_1=33 +lat_2=45 +lat_0=40 +lon_0=-97 +x_0=0 +y_0=0 +a=6370997 +b=6370997 +to_meter=1\",\n", + "\"C:\\\\Users\\\\hemamipo\\\\OneDrive - University of Illinois - Urbana\\\\Research-UIUC\\\\Tessum Group\\\\Tasks\\\\NEI_Processor\\\\gridCells.txt\",\n", + "\"InMAP\",\n", + "\"C:/Users/hemamipo/OneDrive - University of Illinois - Urbana/Research-UIUC/Tessum Group/Tasks/NEI_Processor/2019nei/cb_2019_us_county_500k/cb_2019_us_county_500k.shp\",\n", + "\"C:/Users/hemamipo/OneDrive - University of Illinois - Urbana/Research-UIUC/Tessum Group/Tasks/NEI_Processor/Emission Shapefiles/2019\"\n", + ")\n", + "\n", + "InMAP_crs = \"\"\"\n", + "PROJCS[\\\"Lambert_Conformal_Conic\\\",GEOGCS[\\\"GCS_unnamed ellipse\\\",DATUM[\\\"D_unknown\\\",SPHEROID[\\\"locIndexUnknown\\\",6370997.000000,0]],PRIMEM[\\\"Greenwich\\\",0],UNIT[\\\"Degree\\\",0.017453292519943295]],PROJECTION[\\\"Lambert_Conformal_Conic\\\"],PARAMETER[\\\"standard_parallel_1\\\",33],PARAMETER[\\\"standard_parallel_2\\\",45],PARAMETER[\\\"latitude_of_origin\\\",40],PARAMETER[\\\"central_meridian\\\",-97],PARAMETER[\\\"false_easting\\\",0],PARAMETER[\\\"false_northing\\\",0],UNIT[\\\"Meter\\\",1]]\n", + "\"\"\"\n", + "\n", + "sp, gd = setupSpatialProcessor(c)\n", + "\n", + "uCoordinates = uniqueCoordinates(records)\n", + "gdCRS = \"+proj=lcc +lat_1=33 +lat_2=45 +lat_0=40 +lon_0=-97 +x_0=0 +y_0=0 +a=6370997 +b=6370997 +to_meter=1\"\n", + "emisCRS = \"EPSG:4326\" \n", + "\n", + "transformer = Proj4.Transformation(emisCRS, gdCRS)\n", + "\n", + "uCoordinates = uniqueCoordinates(records)\n", + "\n", + "locIndex = Dict{Union{Tuple{Float64, Float64}, String}, Union{IndexInfo, Nothing}}()\n", + "\n", + "for (longitude, latitude) in uCoordinates\n", + " x, y = transformer([latitude, longitude])\n", + " idx = GetIndex(gd, gdCRS, \"point\",x=x, y=y)\n", + " locIndex[(longitude, latitude)] = idx\n", + "end\n", + "\n", + "writeEmis(c.EmisShp * \"/ptegu.shp\", gd, records, locIndex, InMAP_crs, \"point\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Our next steps will be to perform more spatial and temporal processing on these emissions. To do this, we will pull in the EPA's spatial surrogate shapefiles to allocate county-level emissions to locations within each county, and we will leverage the daily and hourly emission files above plus temporal surrogate information to allocate the emissions to times during the year. Finally, we will leverage emissions factor and meteorology information to convert the mobile and fire activity data above into emissions.\r\n", + "\r\n", + "Stay tuned!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 1.10.2", + "language": "julia", + "name": "julia-1.10" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.10.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}