Skip to content

Commit

Permalink
Move wurst extraction functions to bw2data
Browse files Browse the repository at this point in the history
  • Loading branch information
cmutel committed Aug 17, 2022
1 parent 00370df commit 55f1071
Show file tree
Hide file tree
Showing 9 changed files with 388 additions and 9 deletions.
8 changes: 4 additions & 4 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
- bash: |
source activate bw2
mamba install --yes --quiet -c defaults -c conda-forge -c cmutel --name bw2 bw_processing python=$PYTHON_VERSION peewee wurst brightway25 pytest pytest-azurepipelines">=1.0" pytest-cov pip
mamba install --yes --quiet -c defaults -c conda-forge -c cmutel --name bw2 bw_processing python=$PYTHON_VERSION peewee tqdm brightway25 pytest pytest-azurepipelines">=1.0" pytest-cov pip
displayName: Install Anaconda packages
- bash: |
Expand Down Expand Up @@ -90,7 +90,7 @@ jobs:
- bash: |
source activate bw2
mamba install --yes -c defaults -c conda-forge -c cmutel --name bw2 python=$PYTHON_VERSION bw_processing pytest peewee wurst brightway25 pytest-azurepipelines">=1.0" pip
mamba install --yes -c defaults -c conda-forge -c cmutel --name bw2 python=$PYTHON_VERSION bw_processing pytest peewee tqdm brightway25 pytest-azurepipelines">=1.0" pip
displayName: Install Anaconda packages
- bash: |
Expand Down Expand Up @@ -144,7 +144,7 @@ jobs:
- bash: |
source activate bw2
mamba install --yes -c defaults -c conda-forge -c cmutel --name bw2 python=$PYTHON_VERSION bw_processing pytest peewee wurst brightway25 pytest-azurepipelines">=1.0" pip
mamba install --yes -c defaults -c conda-forge -c cmutel --name bw2 python=$PYTHON_VERSION bw_processing pytest peewee tqdm brightway25 pytest-azurepipelines">=1.0" pip
displayName: Install Anaconda packages
- bash: |
Expand Down Expand Up @@ -198,7 +198,7 @@ jobs:
- script: |
call activate bw2
conda install --yes -c defaults -c conda-forge -c cmutel -c haasad --name bw2 python=%PYTHON_VERSION% bw_processing pytest peewee wurst brightway25 pytest-azurepipelines">=1.0" pywin32 pip
conda install --yes -c defaults -c conda-forge -c cmutel -c haasad --name bw2 python=%PYTHON_VERSION% bw_processing pytest peewee tqdm brightway25 pytest-azurepipelines">=1.0" pywin32 pip
displayName: Install Anaconda packages
- script: |
Expand Down
2 changes: 2 additions & 0 deletions bw2data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"databases",
"DataStore",
"Edge",
"extract_brightway_databases",
"get_activity",
"get_node",
"get_id",
Expand Down Expand Up @@ -72,6 +73,7 @@
from .weighting_normalization import Weighting, Normalization
from .backends import convert_backend, get_id, Node, Edge
from .compat import prepare_lca_inputs, Mapping
from .backends.wurst_extraction import extract_brightway_databases

mapping = Mapping()

Expand Down
7 changes: 2 additions & 5 deletions bw2data/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -947,14 +947,11 @@ def edges_to_dataframe(self, categorical: bool = True, formatters: Optional[List
Returns a pandas ``DataFrame``.
"""
try:
from wurst import extract_brightway2_databases
except ImportError:
raise ImportError("This method requires the `wurst` library.")
from .wurst_extraction import extract_brightway_databases

result = []

for target in extract_brightway2_databases(self.name, add_identifiers=True):
for target in extract_brightway_databases(self.name, add_identifiers=True):
for edge in target["exchanges"]:
row = {
"target_id": target["id"],
Expand Down
171 changes: 171 additions & 0 deletions bw2data/backends/wurst_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
from tqdm import tqdm
import copy

from ..database import DatabaseChooser
from . import SQLiteBackend, ActivityDataset, ExchangeDataset


def _list_or_dict(obj):
if isinstance(obj, dict):
for key, value in obj.items():
cp = copy.deepcopy(value)
cp["name"] = key
yield cp
else:
for tmp in obj:
yield (tmp)


def extract_activity(proxy, add_identifiers=False):
"""Get data in Wurst internal format for an ``ActivityDataset``"""
assert isinstance(proxy, ActivityDataset)

obj = {
"classifications": proxy.data.get("classifications", []),
"comment": proxy.data.get("comment", ""),
"location": proxy.location,
"database": proxy.database,
"code": proxy.code,
"name": proxy.name,
"reference product": proxy.product,
"unit": proxy.data.get("unit", ""),
"exchanges": [],
"parameters": {
obj["name"]: obj["amount"]
for obj in _list_or_dict(proxy.data.get("parameters", []))
},
"parameters full": list(_list_or_dict(proxy.data.get("parameters", []))),
}
if add_identifiers:
obj["id"] = proxy.id
return obj


def extract_exchange(proxy, add_properties=False):
"""Get data in Wurst internal format for an ``ExchangeDataset``"""
assert isinstance(proxy, ExchangeDataset)

uncertainty_fields = (
"uncertainty type",
"loc",
"scale",
"shape",
"minimum",
"maximum",
"amount",
"pedigree",
)
data = {key: proxy.data[key] for key in uncertainty_fields if key in proxy.data}
assert "amount" in data, "Exchange has no `amount` field"
if "uncertainty type" not in data:
data["uncertainty type"] = 0
data["loc"] = data["amount"]
data["type"] = proxy.type
data["production volume"] = proxy.data.get("production volume")
data["input"] = (proxy.input_database, proxy.input_code)
data["output"] = (proxy.output_database, proxy.output_code)
if add_properties:
data["properties"] = proxy.data.get("properties", {})
return data


def add_exchanges_to_consumers(activities, exchange_qs, add_properties=False, add_identifiers=False):
"""Retrieve exchanges from database, and add to activities.
Assumes that activities are single output, and that the exchange code is the same as the activity code. This assumption is valid for ecoinvent 3.3 cutoff imported into Brightway2."""
lookup = {(o["database"], o["code"]): o for o in activities}

with tqdm(total=exchange_qs.count()) as pbar:
for i, exc in enumerate(exchange_qs):
exc = extract_exchange(exc, add_properties=add_properties)
output = tuple(exc.pop("output"))
lookup[output]["exchanges"].append(exc)
pbar.update(1)
return activities


def add_input_info_for_indigenous_exchanges(activities, names, add_identifiers=False):
"""Add details on exchange inputs if these activities are already available"""
names = set(names)
lookup = {(o["database"], o["code"]): o for o in activities}

for ds in activities:
for exc in ds["exchanges"]:
if "input" not in exc or exc["input"][0] not in names:
continue
obj = lookup[exc["input"]]
exc["product"] = obj.get("reference product")
exc["name"] = obj.get("name")
exc["unit"] = obj.get("unit")
exc["location"] = obj.get("location")
exc["database"] = obj.get("database")
if add_identifiers:
exc["id"] = obj['id']
exc['code'] = obj['code']
if exc["type"] == "biosphere":
exc["categories"] = obj.get("categories")
exc.pop("input")


def add_input_info_for_external_exchanges(activities, names, add_identifiers=False):
"""Add details on exchange inputs from other databases"""
names = set(names)
cache = {}

for ds in tqdm(activities):
for exc in ds["exchanges"]:
if "input" not in exc or exc["input"][0] in names:
continue
if exc["input"] not in cache:
cache[exc["input"]] = ActivityDataset.get(
ActivityDataset.database == exc["input"][0],
ActivityDataset.code == exc["input"][1],
)
obj = cache[exc["input"]]
exc["name"] = obj.name
exc["product"] = obj.product
exc["unit"] = obj.data.get("unit")
exc["location"] = obj.location
exc["database"] = obj.database
if add_identifiers:
exc["id"] = obj.id
exc['code'] = obj.code
if exc["type"] == "biosphere":
exc["categories"] = obj.data.get("categories")


def extract_brightway_databases(database_names, add_properties=False, add_identifiers=False):
"""Extract a Brightway2 SQLiteBackend database to the Wurst internal format.
``database_names`` is a list of database names. You should already be in the correct project.
Returns a list of dataset documents."""
ERROR = "Must pass list of database names"
if isinstance(database_names, str):
database_names = [database_names]
assert isinstance(database_names, (list, tuple, set)), ERROR

databases = [DatabaseChooser(name) for name in database_names]
ERROR = "Wrong type of database object (must be SQLiteBackend)"
assert all(isinstance(obj, SQLiteBackend) for obj in databases), ERROR

# Construct generators for both activities and exchanges
# Need to be clever to minimize copying and memory use
activity_qs = ActivityDataset.select().where(
ActivityDataset.database << database_names
)
exchange_qs = ExchangeDataset.select().where(
ExchangeDataset.output_database << database_names
)

# Retrieve all activity data
print("Getting activity data")
activities = [extract_activity(o, add_identifiers=add_identifiers) for o in tqdm(activity_qs)]
# Add each exchange to the activity list of exchanges
print("Adding exchange data to activities")
add_exchanges_to_consumers(activities, exchange_qs, add_properties)
# Add details on exchanges which come from our databases
print("Filling out exchange data")
add_input_info_for_indigenous_exchanges(activities, database_names, add_identifiers=add_identifiers)
add_input_info_for_external_exchanges(activities, database_names, add_identifiers=add_identifiers)
return activities
1 change: 1 addition & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ python-coveralls
requests>=1.1.0
scipy
stats_arrays
tqdm
voluptuous
whoosh
wrapt
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pyprind
requests>=1.1.0
scipy
stats_arrays
tqdm
voluptuous
whoosh
wrapt
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
"requests>=1.1.0",
"scipy",
"stats_arrays",
"tqdm",
"voluptuous",
"whoosh",
"wrapt",
Expand Down
124 changes: 124 additions & 0 deletions tests/wurst_extraction/extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
from fixtures import test_bw2_database

from bw2data import extract_brightway_databases
from bw2data.tests import bw2test
import pytest


def test_extraction(test_bw2_database):
expected = [
{
"classifications": [42],
"code": "1",
"comment": "Yep",
"database": "food",
"exchanges": [
{
"name": "dinner",
"amount": 0.5,
"database": "food",
"loc": 0.5,
"location": "CH",
"product": None,
"production volume": 13,
"type": "technosphere",
"uncertainty type": 0,
"unit": "kg",
},
{
"name": "an emission",
"amount": 0.05,
"categories": ["things"],
"input": ("biosphere", "1"),
"database": "biosphere",
"location": None,
"product": "find me!",
"production volume": None,
"type": "biosphere",
"uncertainty type": 4,
"unit": "kg",
},
],
"location": "CA",
"name": "lunch",
"reference product": "stuff",
"unit": "kg",
"parameters": {"losses_gross_net": 0.01},
"parameters full": [{"amount": 0.01, "name": "losses_gross_net"}],
},
{
"classifications": [],
"code": "2",
"comment": "",
"database": "food",
"exchanges": [
{
"name": "lunch",
"amount": 0.25,
"location": "CA",
"product": "stuff",
"database": "food",
"production volume": None,
"type": "technosphere",
"uncertainty type": 0,
"unit": "kg",
},
{
"name": "another emission",
"amount": 0.15,
"categories": ["things"],
"input": ("biosphere", "2"),
"database": "biosphere",
"location": None,
"product": None,
"production volume": None,
"type": "biosphere",
"uncertainty type": 0,
"unit": "kg",
},
],
"location": "CH",
"name": "dinner",
"reference product": None,
"unit": "kg",
"parameters": {"rara": 13},
"parameters full": [
{"name": "rara", "amount": 13, "something": "else"}
],
},
]

assert sorted(
extract_brightway_databases("food"), key=lambda x: x["code"]
) == sorted(expected, key=lambda x: x["code"])


@bw2test
def test_extraction_missing_database():
with pytest.raises(AssertionError):
assert extract_brightway_databases("biosphere3")


def test_extraction_input_formats(test_bw2_database):
assert extract_brightway_databases("food")
assert extract_brightway_databases(["food"])
assert extract_brightway_databases(("food",))
assert extract_brightway_databases({"food"})
with pytest.raises(AssertionError):
assert extract_brightway_databases({"food": None})


def test_extraction_with_properties():
data = extract_brightway_databases("food")
assert all("properties" not in exc for ds in data for exc in ds["exchanges"])
data = extract_brightway_databases("food", add_properties=True)
assert all("properties" in exc for ds in data for exc in ds["exchanges"])


def test_extraction_with_identifiers():
data = extract_brightway_databases("food")
assert all("properties" not in exc for ds in data for exc in ds["exchanges"])
data = extract_brightway_databases("food", add_identifiers=True)
assert all("id" in ds for ds in data)
assert all("id" in exc for ds in data for exc in ds["exchanges"])
assert all("code" in exc for ds in data for exc in ds["exchanges"])
Loading

0 comments on commit 55f1071

Please sign in to comment.