-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move wurst extraction functions to bw2data
- Loading branch information
Showing
9 changed files
with
388 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
from tqdm import tqdm | ||
import copy | ||
|
||
from ..database import DatabaseChooser | ||
from . import SQLiteBackend, ActivityDataset, ExchangeDataset | ||
|
||
|
||
def _list_or_dict(obj): | ||
if isinstance(obj, dict): | ||
for key, value in obj.items(): | ||
cp = copy.deepcopy(value) | ||
cp["name"] = key | ||
yield cp | ||
else: | ||
for tmp in obj: | ||
yield (tmp) | ||
|
||
|
||
def extract_activity(proxy, add_identifiers=False): | ||
"""Get data in Wurst internal format for an ``ActivityDataset``""" | ||
assert isinstance(proxy, ActivityDataset) | ||
|
||
obj = { | ||
"classifications": proxy.data.get("classifications", []), | ||
"comment": proxy.data.get("comment", ""), | ||
"location": proxy.location, | ||
"database": proxy.database, | ||
"code": proxy.code, | ||
"name": proxy.name, | ||
"reference product": proxy.product, | ||
"unit": proxy.data.get("unit", ""), | ||
"exchanges": [], | ||
"parameters": { | ||
obj["name"]: obj["amount"] | ||
for obj in _list_or_dict(proxy.data.get("parameters", [])) | ||
}, | ||
"parameters full": list(_list_or_dict(proxy.data.get("parameters", []))), | ||
} | ||
if add_identifiers: | ||
obj["id"] = proxy.id | ||
return obj | ||
|
||
|
||
def extract_exchange(proxy, add_properties=False): | ||
"""Get data in Wurst internal format for an ``ExchangeDataset``""" | ||
assert isinstance(proxy, ExchangeDataset) | ||
|
||
uncertainty_fields = ( | ||
"uncertainty type", | ||
"loc", | ||
"scale", | ||
"shape", | ||
"minimum", | ||
"maximum", | ||
"amount", | ||
"pedigree", | ||
) | ||
data = {key: proxy.data[key] for key in uncertainty_fields if key in proxy.data} | ||
assert "amount" in data, "Exchange has no `amount` field" | ||
if "uncertainty type" not in data: | ||
data["uncertainty type"] = 0 | ||
data["loc"] = data["amount"] | ||
data["type"] = proxy.type | ||
data["production volume"] = proxy.data.get("production volume") | ||
data["input"] = (proxy.input_database, proxy.input_code) | ||
data["output"] = (proxy.output_database, proxy.output_code) | ||
if add_properties: | ||
data["properties"] = proxy.data.get("properties", {}) | ||
return data | ||
|
||
|
||
def add_exchanges_to_consumers(activities, exchange_qs, add_properties=False, add_identifiers=False): | ||
"""Retrieve exchanges from database, and add to activities. | ||
Assumes that activities are single output, and that the exchange code is the same as the activity code. This assumption is valid for ecoinvent 3.3 cutoff imported into Brightway2.""" | ||
lookup = {(o["database"], o["code"]): o for o in activities} | ||
|
||
with tqdm(total=exchange_qs.count()) as pbar: | ||
for i, exc in enumerate(exchange_qs): | ||
exc = extract_exchange(exc, add_properties=add_properties) | ||
output = tuple(exc.pop("output")) | ||
lookup[output]["exchanges"].append(exc) | ||
pbar.update(1) | ||
return activities | ||
|
||
|
||
def add_input_info_for_indigenous_exchanges(activities, names, add_identifiers=False): | ||
"""Add details on exchange inputs if these activities are already available""" | ||
names = set(names) | ||
lookup = {(o["database"], o["code"]): o for o in activities} | ||
|
||
for ds in activities: | ||
for exc in ds["exchanges"]: | ||
if "input" not in exc or exc["input"][0] not in names: | ||
continue | ||
obj = lookup[exc["input"]] | ||
exc["product"] = obj.get("reference product") | ||
exc["name"] = obj.get("name") | ||
exc["unit"] = obj.get("unit") | ||
exc["location"] = obj.get("location") | ||
exc["database"] = obj.get("database") | ||
if add_identifiers: | ||
exc["id"] = obj['id'] | ||
exc['code'] = obj['code'] | ||
if exc["type"] == "biosphere": | ||
exc["categories"] = obj.get("categories") | ||
exc.pop("input") | ||
|
||
|
||
def add_input_info_for_external_exchanges(activities, names, add_identifiers=False): | ||
"""Add details on exchange inputs from other databases""" | ||
names = set(names) | ||
cache = {} | ||
|
||
for ds in tqdm(activities): | ||
for exc in ds["exchanges"]: | ||
if "input" not in exc or exc["input"][0] in names: | ||
continue | ||
if exc["input"] not in cache: | ||
cache[exc["input"]] = ActivityDataset.get( | ||
ActivityDataset.database == exc["input"][0], | ||
ActivityDataset.code == exc["input"][1], | ||
) | ||
obj = cache[exc["input"]] | ||
exc["name"] = obj.name | ||
exc["product"] = obj.product | ||
exc["unit"] = obj.data.get("unit") | ||
exc["location"] = obj.location | ||
exc["database"] = obj.database | ||
if add_identifiers: | ||
exc["id"] = obj.id | ||
exc['code'] = obj.code | ||
if exc["type"] == "biosphere": | ||
exc["categories"] = obj.data.get("categories") | ||
|
||
|
||
def extract_brightway_databases(database_names, add_properties=False, add_identifiers=False): | ||
"""Extract a Brightway2 SQLiteBackend database to the Wurst internal format. | ||
``database_names`` is a list of database names. You should already be in the correct project. | ||
Returns a list of dataset documents.""" | ||
ERROR = "Must pass list of database names" | ||
if isinstance(database_names, str): | ||
database_names = [database_names] | ||
assert isinstance(database_names, (list, tuple, set)), ERROR | ||
|
||
databases = [DatabaseChooser(name) for name in database_names] | ||
ERROR = "Wrong type of database object (must be SQLiteBackend)" | ||
assert all(isinstance(obj, SQLiteBackend) for obj in databases), ERROR | ||
|
||
# Construct generators for both activities and exchanges | ||
# Need to be clever to minimize copying and memory use | ||
activity_qs = ActivityDataset.select().where( | ||
ActivityDataset.database << database_names | ||
) | ||
exchange_qs = ExchangeDataset.select().where( | ||
ExchangeDataset.output_database << database_names | ||
) | ||
|
||
# Retrieve all activity data | ||
print("Getting activity data") | ||
activities = [extract_activity(o, add_identifiers=add_identifiers) for o in tqdm(activity_qs)] | ||
# Add each exchange to the activity list of exchanges | ||
print("Adding exchange data to activities") | ||
add_exchanges_to_consumers(activities, exchange_qs, add_properties) | ||
# Add details on exchanges which come from our databases | ||
print("Filling out exchange data") | ||
add_input_info_for_indigenous_exchanges(activities, database_names, add_identifiers=add_identifiers) | ||
add_input_info_for_external_exchanges(activities, database_names, add_identifiers=add_identifiers) | ||
return activities |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ python-coveralls | |
requests>=1.1.0 | ||
scipy | ||
stats_arrays | ||
tqdm | ||
voluptuous | ||
whoosh | ||
wrapt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ pyprind | |
requests>=1.1.0 | ||
scipy | ||
stats_arrays | ||
tqdm | ||
voluptuous | ||
whoosh | ||
wrapt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,7 @@ | |
"requests>=1.1.0", | ||
"scipy", | ||
"stats_arrays", | ||
"tqdm", | ||
"voluptuous", | ||
"whoosh", | ||
"wrapt", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
from fixtures import test_bw2_database | ||
|
||
from bw2data import extract_brightway_databases | ||
from bw2data.tests import bw2test | ||
import pytest | ||
|
||
|
||
def test_extraction(test_bw2_database): | ||
expected = [ | ||
{ | ||
"classifications": [42], | ||
"code": "1", | ||
"comment": "Yep", | ||
"database": "food", | ||
"exchanges": [ | ||
{ | ||
"name": "dinner", | ||
"amount": 0.5, | ||
"database": "food", | ||
"loc": 0.5, | ||
"location": "CH", | ||
"product": None, | ||
"production volume": 13, | ||
"type": "technosphere", | ||
"uncertainty type": 0, | ||
"unit": "kg", | ||
}, | ||
{ | ||
"name": "an emission", | ||
"amount": 0.05, | ||
"categories": ["things"], | ||
"input": ("biosphere", "1"), | ||
"database": "biosphere", | ||
"location": None, | ||
"product": "find me!", | ||
"production volume": None, | ||
"type": "biosphere", | ||
"uncertainty type": 4, | ||
"unit": "kg", | ||
}, | ||
], | ||
"location": "CA", | ||
"name": "lunch", | ||
"reference product": "stuff", | ||
"unit": "kg", | ||
"parameters": {"losses_gross_net": 0.01}, | ||
"parameters full": [{"amount": 0.01, "name": "losses_gross_net"}], | ||
}, | ||
{ | ||
"classifications": [], | ||
"code": "2", | ||
"comment": "", | ||
"database": "food", | ||
"exchanges": [ | ||
{ | ||
"name": "lunch", | ||
"amount": 0.25, | ||
"location": "CA", | ||
"product": "stuff", | ||
"database": "food", | ||
"production volume": None, | ||
"type": "technosphere", | ||
"uncertainty type": 0, | ||
"unit": "kg", | ||
}, | ||
{ | ||
"name": "another emission", | ||
"amount": 0.15, | ||
"categories": ["things"], | ||
"input": ("biosphere", "2"), | ||
"database": "biosphere", | ||
"location": None, | ||
"product": None, | ||
"production volume": None, | ||
"type": "biosphere", | ||
"uncertainty type": 0, | ||
"unit": "kg", | ||
}, | ||
], | ||
"location": "CH", | ||
"name": "dinner", | ||
"reference product": None, | ||
"unit": "kg", | ||
"parameters": {"rara": 13}, | ||
"parameters full": [ | ||
{"name": "rara", "amount": 13, "something": "else"} | ||
], | ||
}, | ||
] | ||
|
||
assert sorted( | ||
extract_brightway_databases("food"), key=lambda x: x["code"] | ||
) == sorted(expected, key=lambda x: x["code"]) | ||
|
||
|
||
@bw2test | ||
def test_extraction_missing_database(): | ||
with pytest.raises(AssertionError): | ||
assert extract_brightway_databases("biosphere3") | ||
|
||
|
||
def test_extraction_input_formats(test_bw2_database): | ||
assert extract_brightway_databases("food") | ||
assert extract_brightway_databases(["food"]) | ||
assert extract_brightway_databases(("food",)) | ||
assert extract_brightway_databases({"food"}) | ||
with pytest.raises(AssertionError): | ||
assert extract_brightway_databases({"food": None}) | ||
|
||
|
||
def test_extraction_with_properties(): | ||
data = extract_brightway_databases("food") | ||
assert all("properties" not in exc for ds in data for exc in ds["exchanges"]) | ||
data = extract_brightway_databases("food", add_properties=True) | ||
assert all("properties" in exc for ds in data for exc in ds["exchanges"]) | ||
|
||
|
||
def test_extraction_with_identifiers(): | ||
data = extract_brightway_databases("food") | ||
assert all("properties" not in exc for ds in data for exc in ds["exchanges"]) | ||
data = extract_brightway_databases("food", add_identifiers=True) | ||
assert all("id" in ds for ds in data) | ||
assert all("id" in exc for ds in data for exc in ds["exchanges"]) | ||
assert all("code" in exc for ds in data for exc in ds["exchanges"]) |
Oops, something went wrong.