Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Databeta #956

Merged
merged 6 commits into from
Mar 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ repos:

# Make sure import statements are sorted uniformly.
- repo: https://github.com/pre-commit/mirrors-isort
rev: v5.7.0
rev: v5.8.0
hooks:
- id: isort

Expand Down
3 changes: 2 additions & 1 deletion devtools/datasette/metadata.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ databases:
description_html: |
<p>This database contains cleaned and normalized data from...
<ul>
<li><a href="https://www.eia.gov/electricity/data/eia860/">EIA Form 860</a> (2009-2019)</li>
<li><a href="https://www.eia.gov/electricity/data/eia860/">EIA Form 860</a> (2004-2019)</li>
<li><a href="https://www.eia.gov/electricity/data/eia860m/">EIA Form 860m</a> (year-to-date for 2020-11)</li>
<li><a href="https://www.eia.gov/electricity/data/eia923/">EIA Form 923</a> (2009-2019)</li>
<li><a href="https://www.ferc.gov/industries-data/electric/general-information/electric-industry-forms/form-1-electric-utility-annual">FERC Form 1</a> (1994-2019)</li>
</ul></p>
Expand Down
15 changes: 9 additions & 6 deletions devtools/datasette/publish.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@ sqlite-utils index-foreign-keys $SQLITE_DIR/pudl.sqlite
# Full column and table level descriptions / metadata

datasette publish cloudrun \
--service=catalyst-datasette \
--memory=3Gi \
--install=datasette-cluster-map \
--install=datasette-vega \
--install=datasette-block-robots \
-m metadata.yml $SQLITE_DIR/ferc1.sqlite $SQLITE_DIR/pudl.sqlite
--service catalyst-datasette \
--memory 3Gi \
--install datasette-cluster-map \
--install datasette-vega \
--install datasette-block-robots \
--metadata metadata.yml \
--extra-options="--setting sql_time_limit_ms 5000" \
$SQLITE_DIR/pudl.sqlite \
$SQLITE_DIR/ferc1.sqlite
6 changes: 0 additions & 6 deletions docs/data_sources.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,3 @@ Data Sources
:maxdepth: 1

data_sources/wip_future

.. toctree::
:caption: Metadata
:maxdepth: 1

data_sources/metadata
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ The Public Utility Data Liberation Project
License <LICENSE>
code_of_conduct
Module Index <api/modules>
pudl_db_index
2 changes: 1 addition & 1 deletion docs/data_sources/metadata.rst → docs/pudl_db_index.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
===============================================================================
All PUDL Database Tables
PUDL Database Table Index
===============================================================================

.. _accumulated_depreciation_ferc1:
Expand Down
4 changes: 3 additions & 1 deletion src/pudl/analysis/service_territory.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,8 +459,10 @@ def parse_command_line(argv):

def main():
"""Compile historical utility and balancing area territories."""
# Display logged output from the PUDL package:
pudl_logger = logging.getLogger("pudl")
log_format = '%(asctime)s [%(levelname)8s] %(name)s:%(lineno)s %(message)s'
coloredlogs.install(fmt=log_format, level='INFO', logger=logger)
coloredlogs.install(fmt=log_format, level='INFO', logger=pudl_logger)

args = parse_command_line(sys.argv)
pudl_settings = pudl.workspace.setup.get_defaults()
Expand Down
6 changes: 3 additions & 3 deletions src/pudl/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,15 @@ def parse_command_line(argv):
def main():
"""Parse command line and initialize PUDL DB."""
# Display logged output from the PUDL package:
logger = logging.getLogger(pudl.__name__)
pudl_logger = logging.getLogger("pudl")
log_format = '%(asctime)s [%(levelname)8s] %(name)s:%(lineno)s %(message)s'
coloredlogs.install(fmt=log_format, level='INFO', logger=logger)
coloredlogs.install(fmt=log_format, level='INFO', logger=pudl_logger)

args = parse_command_line(sys.argv)
if args.logfile:
file_logger = logging.FileHandler(args.logfile)
file_logger.setFormatter(logging.Formatter(log_format))
logger.addHandler(file_logger)
pudl_logger.addHandler(file_logger)
with pathlib.Path(args.settings_file).open() as f:
script_settings = yaml.safe_load(f)

Expand Down
3 changes: 2 additions & 1 deletion src/pudl/convert/censusdp1tract_to_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,9 @@ def parse_command_line(argv):

def main():
"""Convert the Census DP1 GeoDatabase into an SQLite Database."""
pudl_logger = logging.getLogger("pudl")
log_format = '%(asctime)s [%(levelname)8s] %(name)s:%(lineno)s %(message)s'
coloredlogs.install(fmt=log_format, level='INFO', logger=logger)
coloredlogs.install(fmt=log_format, level='INFO', logger=pudl_logger)

# Currently have no arguments, but want to generate a usage message.
_ = parse_command_line(sys.argv)
Expand Down
5 changes: 5 additions & 0 deletions src/pudl/convert/datapkg_to_rst.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import logging
import sys

import coloredlogs
from jinja2 import BaseLoader, Environment

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -122,6 +123,10 @@ def parse_command_line(argv):

def main():
"""Run conversion from json to rst."""
pudl_logger = logging.getLogger("pudl")
log_format = '%(asctime)s [%(levelname)8s] %(name)s:%(lineno)s %(message)s'
coloredlogs.install(fmt=log_format, level='INFO', logger=pudl_logger)

args = parse_command_line(sys.argv)
datapkg2rst(args.input, args.output)

Expand Down
4 changes: 2 additions & 2 deletions src/pudl/convert/datapkg_to_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,9 @@ def parse_command_line(argv):
def main():
"""Merge PUDL datapackages and save them into an SQLite database."""
# Display logged output from the PUDL package:
logger = logging.getLogger(pudl.__name__)
pudl_logger = logging.getLogger("pudl")
log_format = '%(asctime)s [%(levelname)8s] %(name)s:%(lineno)s %(message)s'
coloredlogs.install(fmt=log_format, level='INFO', logger=logger)
coloredlogs.install(fmt=log_format, level='INFO', logger=pudl_logger)

args = parse_command_line(sys.argv)
pudl_settings = pudl.workspace.setup.get_defaults()
Expand Down
4 changes: 2 additions & 2 deletions src/pudl/convert/epacems_to_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,9 @@ def parse_command_line(argv):
def main():
"""Convert zipped EPA CEMS Hourly data to Apache Parquet format."""
# Display logged output from the PUDL package:
logger = logging.getLogger(pudl.__name__)
pudl_logger = logging.getLogger("pudl")
log_format = '%(asctime)s [%(levelname)8s] %(name)s:%(lineno)s %(message)s'
coloredlogs.install(fmt=log_format, level='INFO', logger=logger)
coloredlogs.install(fmt=log_format, level='INFO', logger=pudl_logger)

args = parse_command_line(sys.argv)

Expand Down
3 changes: 2 additions & 1 deletion src/pudl/convert/ferc1_to_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,9 @@ def parse_command_line(argv):
def main(): # noqa: C901
"""Clone the FERC Form 1 FoxPro database into SQLite."""
# Display logged output from the PUDL package:
pudl_logger = logging.getLogger("pudl")
log_format = '%(asctime)s [%(levelname)8s] %(name)s:%(lineno)s %(message)s'
coloredlogs.install(fmt=log_format, level='INFO', logger=logger)
coloredlogs.install(fmt=log_format, level='INFO', logger=pudl_logger)

args = parse_command_line(sys.argv)
with pathlib.Path(args.settings_file).open() as f:
Expand Down
2 changes: 1 addition & 1 deletion src/pudl/transform/ferc1.py
Original file line number Diff line number Diff line change
Expand Up @@ -1056,7 +1056,7 @@ def fuel(ferc1_raw_dfs, ferc1_transformed_dfs):
# Drop any records that are missing data. This is a blunt instrument, to
# be sure. In some cases we lose data here, because some utilities have
# (for example) a "Total" line w/ only fuel_mmbtu_per_kwh on it. Grr.
# fuel_ferc1_df.dropna(inplace=True)
fuel_ferc1_df.dropna(inplace=True)

ferc1_transformed_dfs['fuel_ferc1'] = fuel_ferc1_df

Expand Down
4 changes: 2 additions & 2 deletions src/pudl/workspace/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,9 +516,9 @@ def main():
"""Cache datasets."""
args = parse_command_line()

# logger = logging.getLogger(pudl.__name__)
pudl_logger = logging.getLogger("pudl")
log_format = '%(asctime)s [%(levelname)8s] %(name)s:%(lineno)s %(message)s'
coloredlogs.install(fmt=log_format, level='INFO', logger=logger)
coloredlogs.install(fmt=log_format, level='INFO', logger=pudl_logger)

logger.setLevel(args.loglevel)

Expand Down
6 changes: 4 additions & 2 deletions src/pudl/workspace/setup_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@

import pudl

logger = logging.getLogger(__name__)


def initialize_parser():
"""Parse command line arguments for the pudl_setup script."""
Expand Down Expand Up @@ -105,9 +107,9 @@ def initialize_parser():
def main():
"""Set up a new default PUDL workspace."""
# Display logged output from the PUDL package:
logger = logging.getLogger(pudl.__name__)
pudl_logger = logging.getLogger("pudl")
log_format = '%(asctime)s [%(levelname)8s] %(name)s:%(lineno)s %(message)s'
coloredlogs.install(fmt=log_format, level='INFO', logger=logger)
coloredlogs.install(fmt=log_format, level='INFO', logger=pudl_logger)

parser = initialize_parser()
args = parser.parse_args(sys.argv[1:])
Expand Down
59 changes: 49 additions & 10 deletions test/integration/fast_output_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""PyTest cases related to the integration between FERC1 & EIA 860/923."""
import logging
import os
import sys

import geopandas as gpd
import pytest

import pudl
Expand Down Expand Up @@ -80,14 +83,50 @@ def test_ferc714_etl(fast_out):
fast_out.etl_ferc714()


def test_ferc714_respondents(fast_out, pudl_settings_fixture):
"""Test the FERC 714 Respondent & Service Territory outputs."""
ferc714_out = pudl.output.ferc714.Respondents(
fast_out,
pudl_settings=pudl_settings_fixture,
@pytest.fixture(scope="module")
def ferc714_out(fast_out, pudl_settings_fixture):
"""A FERC 714 Respondents output object for use in CI."""
return pudl.output.ferc714.Respondents(
fast_out, pudl_settings=pudl_settings_fixture,
)
_ = ferc714_out.annualize()
_ = ferc714_out.categorize()
_ = ferc714_out.summarize_demand()
_ = ferc714_out.fipsify()
_ = ferc714_out.georef_counties()


def test_ferc714_respondents_annualize(ferc714_out):
"""Test annualized FERC 714 respondent outputs."""
assert len(ferc714_out.annualize()) > 0


def test_ferc714_respondents_categorize(ferc714_out):
"""Test categorized FERC 714 respondent outputs."""
assert len(ferc714_out.categorize()) > 0


def test_ferc714_respondents_summarized(ferc714_out):
"""Test summarized FERC 714 demand outputs."""
assert len(ferc714_out.summarize_demand()) > 0


def test_ferc714_respondents_fipsified(ferc714_out):
"""Test FERC 714 respondent county FIPS associations."""
assert len(ferc714_out.fipsify()) > 0


@pytest.mark.xfail(
(sys.platform != "linux")
& (not os.environ.get("CONDA_PREFIX", False)),
reason="Test relies on ogr2ogr being installed via GDAL."
)
def test_ferc714_respondents_georef_counties(ferc714_out):
"""
Test FERC 714 respondent county FIPS associations.

This test works with the Census DP1 data, which is converted into
SQLite using the GDAL command line tool ogr2ogr. That tools is easy
to install via conda or on Linux, but is more challenging on Windows
and MacOS, so this test is marked xfail conditionally if the user is
neither using conda, nor is on Linux.

"""
ferc714_gdf = ferc714_out.georef_counties()
assert len(ferc714_gdf) > 0
assert isinstance(ferc714_gdf, gpd.GeoDataFrame)
5 changes: 3 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ whitelist_externals =
envdir = {toxinidir}/.env_pudl
passenv =
CI
CONDA_PREFIX
HOME
GITHUB_*
API_KEY_EIA
Expand Down Expand Up @@ -92,10 +93,10 @@ extras =
doc
commands =
bash -c 'rm -rf docs/_build docs/api/*.rst'
bash -c 'rm -rf docs/data_sources/metadata.rst'
bash -c 'rm -rf docs/pudl_db_index.rst'
datapkg_to_rst \
-i src/pudl/package_data/meta/datapkg/datapackage.json \
-o docs/data_sources/metadata.rst
-o docs/pudl_db_index.rst
sphinx-apidoc \
--separate \
--output-dir docs/api src/pudl \
Expand Down