Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2482 privacy requests approved in quick succession error when not running worker #2489

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ The types of changes are:
* Patch masking strategies to better handle null and non-string inputs [#2307](https://github.com/ethyca/fides/pull/2377)
* Renamed prod pushes tag to be `latest` for privacy center and sample app [#2401](https://github.com/ethyca/fides/pull/2407)
* Update firebase connector to better handle non-existent users [#2439](https://github.com/ethyca/fides/pull/2439)
* Fix errors when privacy requests execute concurrently without workers [#2489](https://github.com/ethyca/fides/pull/2489)
* Enable saas request overrides to run in worker runtime [#2489](https://github.com/ethyca/fides/pull/2489)


## [2.5.1](https://github.com/ethyca/fides/compare/2.5.0...2.5.1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def dispatch_message_task(
A wrapper function to dispatch a message task into the Celery queues
"""
schema = FidesopsMessage.parse_obj(message_meta)
with self.session as db:
with self.get_new_session() as db:
dispatch_message(
db,
schema.action_type,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ async def run_privacy_request(
if from_step:
logger.info("Resuming privacy request from checkpoint: '{}'", from_step)

with self.session as session:
with self.get_new_session() as session:
privacy_request = PrivacyRequest.get(db=session, object_id=privacy_request_id)

privacy_request.cache_failed_checkpoint_details() # Reset failed step and collection to None
Expand Down
51 changes: 23 additions & 28 deletions src/fides/api/ops/tasks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,40 @@
from typing import Any, ContextManager, Dict, List, MutableMapping, Optional, Union
from typing import Any, ContextManager, Dict, List, Optional

from celery import Celery, Task
from loguru import logger
from sqlalchemy.orm import Session
from toml import load as load_toml

from fides.core.config import FidesConfig, get_config
from fides.lib.db.session import get_db_session
from fides.lib.db.session import get_db_engine, get_db_session

CONFIG = get_config()
MESSAGING_QUEUE_NAME = "fidesops.messaging"


class DatabaseTask(Task): # pylint: disable=W0223
_session = None
_task_engine = None
_sessionmaker = None

@property
def session(self) -> ContextManager[Session]:
"""Creates Session once per process"""
if self._session is None:
SessionLocal = get_db_session(CONFIG)
self._session = SessionLocal()
def get_new_session(self) -> ContextManager[Session]:
"""
Creates a new Session to be used for each task invocation.

return self._session
The new Sessions will reuse a shared `Engine` and `sessionmaker`
across invocations, so as to reuse db connection resources.
"""
# only one engine will be instantiated in a given task scope, i.e
# once per celery process.
if self._task_engine is None:
_task_engine = get_db_engine(config=CONFIG)

# same for the sessionmaker
if self._sessionmaker is None:
self._sessionmaker = get_db_session(config=CONFIG, engine=_task_engine)

# but a new session is instantiated each time the method is invoked
# to prevent session overlap when requests are executing concurrently
# when in task_always_eager mode (i.e. without proper workers)
return self._sessionmaker()


def _create_celery(config: FidesConfig = get_config()) -> Celery:
Expand Down Expand Up @@ -74,20 +86,3 @@ def get_worker_ids() -> List[Optional[str]]:
logger.critical(exception)
connected_workers = []
return connected_workers


def start_worker() -> None:
logger.info("Running Celery worker...")
default_queue_name = celery_app.conf.get("task_default_queue", "celery")
celery_app.worker_main(
argv=[
"worker",
"--loglevel=info",
"--concurrency=2",
f"--queues={default_queue_name},{MESSAGING_QUEUE_NAME}",
]
)


if __name__ == "__main__": # pragma: no cover
start_worker()
21 changes: 21 additions & 0 deletions src/fides/api/ops/worker/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from loguru import logger

from fides.api.ops.service.saas_request.override_implementations import *
from fides.api.ops.tasks import MESSAGING_QUEUE_NAME, celery_app


def start_worker() -> None:
logger.info("Running Celery worker...")
default_queue_name = celery_app.conf.get("task_default_queue", "celery")
celery_app.worker_main(
argv=[
"worker",
"--loglevel=info",
"--concurrency=2",
f"--queues={default_queue_name},{MESSAGING_QUEUE_NAME}",
]
)


if __name__ == "__main__": # pragma: no cover
start_worker()
2 changes: 1 addition & 1 deletion src/fides/cli/commands/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def worker(ctx: click.Context) -> None:
Starts a celery worker.
"""
# This has to be here to avoid a circular dependency
from fides.api.ops.tasks import start_worker
from fides.api.ops.worker import start_worker

start_worker()

Expand Down
11 changes: 11 additions & 0 deletions tests/ctl/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,17 @@ def test_webserver() -> None:
assert True


@pytest.mark.unit
def test_worker() -> None:
"""
This is specifically meant to catch when the worker command breaks,
without spinning up an additional instance.
"""
from fides.api.ops.worker import start_worker # pylint: disable=unused-import

assert True


@pytest.mark.unit
def test_parse(test_config_path: str, test_cli_runner: CliRunner) -> None:
result = test_cli_runner.invoke(
Expand Down