11from __future__ import annotations
22
3+ import os
4+ from multiprocessing import Barrier , Process , Value , synchronize
5+ from multiprocessing .shared_memory import SharedMemory
6+ from typing import Callable
7+
8+ import pytest
9+
310from crawlee ._utils .byte_size import ByteSize
411from crawlee ._utils .system import get_cpu_info , get_memory_info
512
@@ -14,3 +21,105 @@ def test_get_memory_info_returns_valid_values() -> None:
1421def test_get_cpu_info_returns_valid_values () -> None :
1522 cpu_info = get_cpu_info ()
1623 assert 0 <= cpu_info .used_ratio <= 1
24+
25+
26+ @pytest .mark .skipif (os .name == 'nt' , reason = 'Improved estimation not available on Windows' )
27+ def test_memory_estimation_does_not_overestimate_due_to_shared_memory () -> None :
28+ """Test that memory usage estimation is not overestimating memory usage by counting shared memory multiple times.
29+
30+ In this test, the parent process is started and its memory usage is measured in situations where it is running
31+ child processes without additional memory, with shared additional memory and with own unshared additional memory.
32+ Child process without additional memory are used to estimate baseline memory usage of any child process.
33+ The following estimation is asserted by the test:
34+ additional_memory_size_estimate_per_shared_memory_child * number_of_sharing_children_processes is approximately
35+ equal to additional_memory_size_estimate_per_unshared_memory_child where the additional shared memory is exactly
36+ the same as the unshared memory.
37+ """
38+ estimated_memory_expectation = Value ('b' , False ) # noqa: FBT003 # Common usage pattern for multiprocessing.Value
39+
40+ def parent_process () -> None :
41+ extra_memory_size = 1024 * 1024 * 100 # 100 MB
42+ children_count = 4
43+ # Memory calculation is not exact, so allow for some tolerance.
44+ test_tolerance = 0.1
45+
46+ def no_extra_memory_child (ready : synchronize .Barrier , measured : synchronize .Barrier ) -> None :
47+ ready .wait ()
48+ measured .wait ()
49+
50+ def extra_memory_child (ready : synchronize .Barrier , measured : synchronize .Barrier ) -> None :
51+ memory = SharedMemory (size = extra_memory_size , create = True )
52+ memory .buf [:] = bytearray ([255 for _ in range (extra_memory_size )])
53+ ready .wait ()
54+ measured .wait ()
55+ memory .close ()
56+ memory .unlink ()
57+
58+ def shared_extra_memory_child (
59+ ready : synchronize .Barrier , measured : synchronize .Barrier , memory : SharedMemory
60+ ) -> None :
61+ print (memory .buf [- 1 ])
62+ ready .wait ()
63+ measured .wait ()
64+
65+ def get_additional_memory_estimation_while_running_processes (
66+ * , target : Callable , count : int = 1 , use_shared_memory : bool = False
67+ ) -> float :
68+ processes = []
69+ ready = Barrier (parties = count + 1 )
70+ measured = Barrier (parties = count + 1 )
71+ shared_memory : None | SharedMemory = None
72+ memory_before = get_memory_info ().current_size
73+
74+ if use_shared_memory :
75+ shared_memory = SharedMemory (size = extra_memory_size , create = True )
76+ shared_memory .buf [:] = bytearray ([255 for _ in range (extra_memory_size )])
77+ extra_args = [shared_memory ]
78+ else :
79+ extra_args = []
80+
81+ for _ in range (count ):
82+ p = Process (target = target , args = [ready , measured , * extra_args ])
83+ p .start ()
84+ processes .append (p )
85+
86+ ready .wait ()
87+ memory_during = get_memory_info ().current_size
88+ measured .wait ()
89+
90+ for p in processes :
91+ p .join ()
92+
93+ if shared_memory :
94+ shared_memory .close ()
95+ shared_memory .unlink ()
96+
97+ return (memory_during - memory_before ).to_mb () / count
98+
99+ additional_memory_simple_child = get_additional_memory_estimation_while_running_processes (
100+ target = no_extra_memory_child , count = children_count
101+ )
102+ additional_memory_extra_memory_child = (
103+ get_additional_memory_estimation_while_running_processes (target = extra_memory_child , count = children_count )
104+ - additional_memory_simple_child
105+ )
106+ additional_memory_shared_extra_memory_child = (
107+ get_additional_memory_estimation_while_running_processes (
108+ target = shared_extra_memory_child , count = children_count , use_shared_memory = True
109+ )
110+ - additional_memory_simple_child
111+ )
112+
113+ estimated_memory_expectation .value = (
114+ abs ((additional_memory_shared_extra_memory_child * children_count ) - additional_memory_extra_memory_child )
115+ / additional_memory_extra_memory_child
116+ < test_tolerance
117+ )
118+
119+ process = Process (target = parent_process )
120+ process .start ()
121+ process .join ()
122+
123+ assert estimated_memory_expectation .value , (
124+ 'Estimated memory usage for process with shared memory does not meet the expectation.'
125+ )
0 commit comments