Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add output to loggers option #297

Merged
merged 5 commits into from
Apr 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ repos:
- id: isort
args: ["--filter-files"]
- repo: https://github.com/psf/black
rev: 21.4b1
rev: 22.3.0
hooks:
- id: black
args: [--safe]
Expand Down
69 changes: 69 additions & 0 deletions Collecting emissions to a logger.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Collecting emissions to a logger

The `LoggerOutput` class (and `GoogleCloudLoggerOutput` subclass) allows to send emissions tracking to a logger.
This is a specific, distinct logger than the one used by the Code Carbon package for its 'private' logs.
It allows to leverage powerful logging systems, to centralize emissions to some central or cloud-based system, and build reports, triggers, etc. based on these data.

This logging output can be used in parallel of other output options provided by Code Carbon.


## Create a logger

In order to send emissions tracking data to the logger, first create a logger and then create an `EmissionTracker`. `OfflineEmissionTracker` is also supported but lack of network connectivity may forbid to stream tracking data to some central or cloud-based collector.

### Python logger

```python
import logging


# Create a dedicated logger (log name can be the Code Carbon project name for example)
_logger = logging.getLogger(log_name)

# Add a handler, see Python logging for various handlers (here a local file named after log_name)
_channel = logging.FileHandler(log_name + '.log')
_logger.addHandler(_channel)

# Set logging level from DEBUG to CRITICAL (typically INFO)
# This level can be used in the logging process to filter emissions messages
_logger.setLevel(logging.INFO)

# Create a Code Carbon LoggerOutput with the logger, specifying the logging level to be used for emissions data messages
my_logger = LoggerOutput(_logger, logging.INFO)
```


### Google Cloud Logging

```python
import google.cloud.logging


# Create a Cloud Logging client (specify project name if needed, otherwise Google SDK default project name is used)
client = google.cloud.logging.Client(project=google_project_name)

# Create a Code Carbon GoogleCloudLoggerOutput with the Cloud Logging logger, with the logging level to be used for emissions data messages
my_logger = GoogleCloudLoggerOutput(client.logger(log_name))
```

#### Authentication

Please refer to Google Cloud documentation [here](https://cloud.google.com/logging/docs/reference/libraries#setting_up_authentication).


### Create an EmissionTracker

Create an EmissionTracker saving output to the logger. Other save options are still usable and valid.

```python
tracker = EmissionsTracker(save_to_logger=True, logging_logger=my_logger)
tracker.start()
...
emissions: float = tracker.stop()
...
```

### Example

A demonstration is available in `examples/logging_demo.py`.

23 changes: 23 additions & 0 deletions codecarbon/emissions_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
EmissionsData,
FileOutput,
HTTPOutput,
LoggerOutput,
)

# /!\ Warning: current implementation prevents the user from setting any value to None
Expand Down Expand Up @@ -142,6 +143,8 @@ def __init__(
output_file: Optional[str] = _sentinel,
save_to_file: Optional[bool] = _sentinel,
save_to_api: Optional[bool] = _sentinel,
save_to_logger: Optional[bool] = _sentinel,
logging_logger: Optional[LoggerOutput] = _sentinel,
gpu_ids: Optional[List] = _sentinel,
emissions_endpoint: Optional[str] = _sentinel,
experiment_id: Optional[str] = _sentinel,
Expand Down Expand Up @@ -170,6 +173,10 @@ def __init__(
file, defaults to True
:param save_to_api: Indicates if the emission artifacts should be send to the
CodeCarbon API, defaults to False
:param save_to_logger: Indicates if the emission artifacts should be written
to a dedicated logger, defaults to False
:param logging_logger: LoggerOutput object encapsulating a logging.logger
or a Google Cloud logger
:param gpu_ids: User-specified known gpu ids to track, defaults to None
:param emissions_endpoint: Optional URL of http endpoint for sending emissions
data
Expand Down Expand Up @@ -206,6 +213,8 @@ def __init__(
self._set_from_conf(project_name, "project_name", "codecarbon")
self._set_from_conf(save_to_api, "save_to_api", False, bool)
self._set_from_conf(save_to_file, "save_to_file", True, bool)
self._set_from_conf(save_to_logger, "save_to_logger", False, bool)
self._set_from_conf(logging_logger, "logging_logger")
self._set_from_conf(tracking_mode, "tracking_mode", "machine")
self._set_from_conf(on_csv_write, "on_csv_write", "append")
self._set_from_conf(logger_preamble, "logger_preamble", "")
Expand Down Expand Up @@ -332,6 +341,9 @@ def __init__(
)
)

if self._save_to_logger:
self.persistence_objs.append(self._logging_logger)

if self._emissions_endpoint:
self.persistence_objs.append(HTTPOutput(emissions_endpoint))

Expand All @@ -347,6 +359,7 @@ def __init__(
)
self.run_id = self._cc_api__out.run_id
self.persistence_objs.append(self._cc_api__out)

else:
self.run_id = uuid.uuid4()

Expand Down Expand Up @@ -700,6 +713,8 @@ def track_emissions(
output_file: Optional[str] = _sentinel,
save_to_file: Optional[bool] = _sentinel,
save_to_api: Optional[bool] = _sentinel,
save_to_logger: Optional[bool] = _sentinel,
logging_logger: Optional[LoggerOutput] = _sentinel,
offline: Optional[bool] = _sentinel,
emissions_endpoint: Optional[str] = _sentinel,
experiment_id: Optional[str] = _sentinel,
Expand All @@ -726,6 +741,10 @@ def track_emissions(
defaults to True
:param save_to_api: Indicates if the emission artifacts should be send to the
CodeCarbon API, defaults to False
:param save_to_logger: Indicates if the emission artifacts should be written
to a dedicated logger, defaults to False
:param logging_logger: LoggerOutput object encapsulating a logging.logger
or a Google Cloud logger
:param offline: Indicates if the tracker should be run in offline mode
:param country_iso_code: 3 letter ISO Code of the country where the experiment is
being run, required if `offline=True`
Expand Down Expand Up @@ -763,6 +782,8 @@ def wrapped_fn(*args, **kwargs):
output_dir=output_dir,
output_file=output_file,
save_to_file=save_to_file,
save_to_logger=save_to_logger,
logging_logger=logging_logger,
country_iso_code=country_iso_code,
region=region,
cloud_provider=cloud_provider,
Expand All @@ -781,6 +802,8 @@ def wrapped_fn(*args, **kwargs):
output_dir=output_dir,
output_file=output_file,
save_to_file=save_to_file,
save_to_logger=save_to_logger,
logging_logger=logging_logger,
gpu_ids=gpu_ids,
log_level=log_level,
emissions_endpoint=emissions_endpoint,
Expand Down
32 changes: 32 additions & 0 deletions codecarbon/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import csv
import dataclasses
import getpass
import json
import logging
import os
from abc import ABC, abstractmethod
from collections import OrderedDict
Expand Down Expand Up @@ -181,3 +183,33 @@ def out(self, data: EmissionsData):
self.api.add_emission(dataclasses.asdict(data))
except Exception as e:
logger.error(e, exc_info=True)


class LoggerOutput(BaseOutput):
"""
Send emissions data to a logger
"""

def __init__(self, logger, severity=logging.INFO):
self.logger = logger
self.logging_severity = severity

def out(self, data: EmissionsData):
try:
payload = dataclasses.asdict(data)
self.logger.log(self.logging_severity, msg=json.dumps(payload))
except Exception as e:
logger.error(e, exc_info=True)


class GoogleCloudLoggerOutput(LoggerOutput):
"""
Send emissions data to GCP Cloud Logging
"""

def out(self, data: EmissionsData):
try:
payload = dataclasses.asdict(data)
self.logger.log_struct(payload, severity=self.logging_severity)
except Exception as e:
logger.error(e, exc_info=True)
48 changes: 48 additions & 0 deletions examples/logging_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import argparse
import logging
import time

import google.cloud.logging

from codecarbon import EmissionsTracker
from codecarbon.output import GoogleCloudLoggerOutput, LoggerOutput


def train_model(epochs: int):
"""
This function will do nothing during (occurrence * delay) seconds.
The Code Carbon API will be called every (measure_power_secs * api_call_interval)
seconds.
"""
occurrence = epochs # 60 * 24
delay = 30 # Seconds
for i in range(occurrence):
print(f"{occurrence * delay - i * delay} seconds before ending script...")
time.sleep(delay)


if __name__ == "__main__":

parser = argparse.ArgumentParser()
parser.add_argument(
"--google-project",
help="Specify the name of the Google project (for Cloud Logging)",
)
args = parser.parse_args()

log_name = "code_carbone"
if args.google_project:
client = google.cloud.logging.Client(project=args.google_project)
external_logger = GoogleCloudLoggerOutput(client.logger(log_name))
else:
external_logger = logging.getLogger(log_name)
channel = logging.FileHandler(log_name + ".log")
external_logger.addHandler(channel)
external_logger.setLevel(logging.INFO)
external_logger = LoggerOutput(external_logger, logging.INFO)

tracker = EmissionsTracker(save_to_logger=True, logging_logger=external_logger)
tracker.start()
train_model(epochs=1) # Each epoch last 30 secondes
emissions: float = tracker.stop()
print(f"Emissions: {emissions} kg")
101 changes: 101 additions & 0 deletions tests/test_logging_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import json
import logging
import os
import tempfile
import time
import unittest

from codecarbon.emissions_tracker import (
EmissionsTracker,
OfflineEmissionsTracker,
track_emissions,
)
from codecarbon.output import LoggerOutput


def heavy_computation(run_time_secs: int = 3):
end_time: float = time.time() + run_time_secs # Run for `run_time_secs` seconds
while time.time() < end_time:
pass


class TestCarbonTrackerFlush(unittest.TestCase):
def setUp(self) -> None:
self.project_name = "project_TestCarbonLoggingOutput"
self.emissions_logfile = "emissions-test-TestCarbonLoggingOutput.log"
self.emissions_path = tempfile.gettempdir()
self.emissions_file_path = os.path.join(
self.emissions_path, self.emissions_logfile
)
if os.path.isfile(self.emissions_file_path):
os.remove(self.emissions_file_path)
self._test_logger = logging.getLogger(self.project_name)
_channel = logging.FileHandler(self.emissions_file_path)
self._test_logger.addHandler(_channel)
self._test_logger.setLevel(logging.INFO)
self.external_logger = LoggerOutput(self._test_logger, logging.INFO)

def tearDown(self) -> None:
for handler in self._test_logger.handlers[:]:
self._test_logger.removeHandler(handler)
handler.close()
if os.path.isfile(self.emissions_file_path):
os.remove(self.emissions_file_path)

def test_carbon_tracker_online_logging_output(self):
tracker = EmissionsTracker(
project_name=self.project_name,
save_to_logger=True,
logging_logger=self.external_logger,
)
tracker.start()
heavy_computation(run_time_secs=1)
# tracker.flush()
# heavy_computation(run_time_secs=1)
emissions = tracker.stop()
assert isinstance(emissions, float)
self.assertNotEqual(emissions, 0.0)
self.assertAlmostEqual(emissions, 6.262572537957655e-05, places=2)
self.verify_logging_output(self.emissions_file_path)

def test_carbon_tracker_offline_logging_output(self):
tracker = OfflineEmissionsTracker(
project_name=self.project_name,
country_iso_code="USA",
save_to_logger=True,
logging_logger=self.external_logger,
)
tracker.start()
heavy_computation(run_time_secs=1)
# tracker.flush()
# heavy_computation(run_time_secs=1)
emissions = tracker.stop()
assert isinstance(emissions, float)
self.assertNotEqual(emissions, 0.0)
self.assertAlmostEqual(emissions, 6.262572537957655e-05, places=2)
self.verify_logging_output(self.emissions_file_path)

def test_decorator_flush(self):
@track_emissions(
project_name=self.project_name,
save_to_logger=True,
logging_logger=self.external_logger,
)
def dummy_train_model():
heavy_computation(run_time_secs=1)
# I don't know how to call flush() in decorator mode
return 42

res = dummy_train_model()
self.assertEqual(res, 42)

self.verify_logging_output(self.emissions_file_path, 1)

def verify_logging_output(self, file_path: str, expected_lines=1) -> None:
with open(file_path, "r") as f:
lines = [line.rstrip() for line in f]
assert len(lines) == expected_lines
results = json.loads(lines[0])
self.assertEqual(results["project_name"], self.project_name)
self.assertNotEqual(results["emissions"], 0.0)
self.assertAlmostEqual(results["emissions"], 6.262572537957655e-05, places=2)