|
| 1 | +import argparse |
| 2 | +import asyncio |
| 3 | +import concurrent.futures |
| 4 | +import dataclasses |
| 5 | +import multiprocessing |
| 6 | +import traceback |
| 7 | +from typing import Literal |
| 8 | + |
| 9 | +from temporalio.client import Client |
| 10 | +from temporalio.envconfig import ClientConfig |
| 11 | +from temporalio.runtime import Runtime, TelemetryConfig |
| 12 | +from temporalio.worker import PollerBehaviorSimpleMaximum, Worker |
| 13 | +from temporalio.worker.workflow_sandbox import ( |
| 14 | + SandboxedWorkflowRunner, |
| 15 | + SandboxRestrictions, |
| 16 | +) |
| 17 | + |
| 18 | +from worker_multiprocessing import ACTIVITY_TASK_QUEUE, WORKFLOW_TASK_QUEUE |
| 19 | +from worker_multiprocessing.activities import echo_pid_activity |
| 20 | +from worker_multiprocessing.workflows import ParallelizedWorkflow |
| 21 | + |
| 22 | +# Immediately prevent the default Runtime from being created to ensure |
| 23 | +# each process creates it's own |
| 24 | +Runtime.prevent_default() |
| 25 | + |
| 26 | + |
| 27 | +class Args(argparse.Namespace): |
| 28 | + num_workflow_workers: int |
| 29 | + num_activity_workers: int |
| 30 | + |
| 31 | + @property |
| 32 | + def total_workers(self) -> int: |
| 33 | + return self.num_activity_workers + self.num_workflow_workers |
| 34 | + |
| 35 | + |
| 36 | +def main(): |
| 37 | + parser = argparse.ArgumentParser() |
| 38 | + parser.add_argument("-w", "--num-workflow-workers", type=int, default=2) |
| 39 | + parser.add_argument("-a", "--num-activity-workers", type=int, default=1) |
| 40 | + args = parser.parse_args(namespace=Args()) |
| 41 | + print( |
| 42 | + f"starting {args.num_workflow_workers} workflow worker(s) and {args.num_activity_workers} activity worker(s)" |
| 43 | + ) |
| 44 | + |
| 45 | + # This sample prefers fork to avoid re-importing modules |
| 46 | + # and decrease startup time. Fork is not available on all |
| 47 | + # operating systems, so we fallback to 'spawn' when not available |
| 48 | + try: |
| 49 | + mp_ctx = multiprocessing.get_context("fork") |
| 50 | + except ValueError: |
| 51 | + mp_ctx = multiprocessing.get_context("spawn") # type: ignore |
| 52 | + |
| 53 | + with concurrent.futures.ProcessPoolExecutor( |
| 54 | + args.total_workers, mp_context=mp_ctx |
| 55 | + ) as executor: |
| 56 | + # Start workflow workers by submitting them to the |
| 57 | + # ProcessPoolExecutor |
| 58 | + worker_futures = [ |
| 59 | + executor.submit(worker_entry, "workflow", i) |
| 60 | + for i in range(args.num_workflow_workers) |
| 61 | + ] |
| 62 | + |
| 63 | + # In this sample, we start activity workers as separate processes in the |
| 64 | + # same way we do workflow workers. In production, activity workers |
| 65 | + # are often deployed separately from workflow workers to account for |
| 66 | + # differing scaling characteristics. |
| 67 | + worker_futures.extend( |
| 68 | + [ |
| 69 | + executor.submit(worker_entry, "activity", i) |
| 70 | + for i in range(args.num_activity_workers) |
| 71 | + ] |
| 72 | + ) |
| 73 | + |
| 74 | + try: |
| 75 | + print("waiting for keyboard interrupt or for all workers to exit") |
| 76 | + for worker in concurrent.futures.as_completed(worker_futures): |
| 77 | + print("ERROR: worker exited unexpectedly") |
| 78 | + if worker.exception(): |
| 79 | + traceback.print_exception(worker.exception()) |
| 80 | + except KeyboardInterrupt: |
| 81 | + pass |
| 82 | + |
| 83 | + |
| 84 | +def worker_entry(worker_type: Literal["workflow", "activity"], id: int): |
| 85 | + Runtime.set_default(Runtime(telemetry=TelemetryConfig())) |
| 86 | + |
| 87 | + async def run_worker(): |
| 88 | + config = ClientConfig.load_client_connect_config() |
| 89 | + config.setdefault("target_host", "localhost:7233") |
| 90 | + client = await Client.connect(**config) |
| 91 | + |
| 92 | + if worker_type == "workflow": |
| 93 | + worker = workflow_worker(client) |
| 94 | + else: |
| 95 | + worker = activity_worker(client) |
| 96 | + |
| 97 | + try: |
| 98 | + print(f"{worker_type}-worker:{id} starting") |
| 99 | + await asyncio.shield(worker.run()) |
| 100 | + except asyncio.CancelledError: |
| 101 | + print(f"{worker_type}-worker:{id} shutting down") |
| 102 | + await worker.shutdown() |
| 103 | + |
| 104 | + asyncio.run(run_worker()) |
| 105 | + |
| 106 | + |
| 107 | +def workflow_worker(client: Client) -> Worker: |
| 108 | + """ |
| 109 | + Create a workflow worker that is configured to leverage being run |
| 110 | + as many child processes. |
| 111 | + """ |
| 112 | + return Worker( |
| 113 | + client, |
| 114 | + task_queue=WORKFLOW_TASK_QUEUE, |
| 115 | + workflows=[ParallelizedWorkflow], |
| 116 | + # Workflow tasks are CPU bound, but generally execute quickly. |
| 117 | + # Because we're leveraging multiprocessing to achieve parallelism, |
| 118 | + # we want each workflow worker to be confirgured for small workflow |
| 119 | + # task processing. |
| 120 | + max_concurrent_workflow_tasks=2, |
| 121 | + workflow_task_poller_behavior=PollerBehaviorSimpleMaximum(2), |
| 122 | + # Allow workflows to access the os module to access the pid |
| 123 | + workflow_runner=SandboxedWorkflowRunner( |
| 124 | + restrictions=dataclasses.replace( |
| 125 | + SandboxRestrictions.default, |
| 126 | + invalid_module_members=SandboxRestrictions.invalid_module_members_default.with_child_unrestricted( |
| 127 | + "os" |
| 128 | + ), |
| 129 | + ) |
| 130 | + ), |
| 131 | + ) |
| 132 | + |
| 133 | + |
| 134 | +def activity_worker(client: Client) -> Worker: |
| 135 | + """ |
| 136 | + Create a basic activity worker |
| 137 | + """ |
| 138 | + return Worker( |
| 139 | + client, |
| 140 | + task_queue=ACTIVITY_TASK_QUEUE, |
| 141 | + activities=[echo_pid_activity], |
| 142 | + ) |
| 143 | + |
| 144 | + |
| 145 | +if __name__ == "__main__": |
| 146 | + main() |
0 commit comments