Description
Hi Bigcodebench team,
Thank you for your excellent work!
Issue
I had some issues when I tried to evaluate the pre-generated samples. I am running the evaluation locally using your pre-built docker image. When evaluating the ground-truths, I saw this "Matplotlib lock error" happens frequently (almost ~70 times on the "full" split).
0: Traceback (most recent call last):
0: File "/usr/local/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
0: self.run()
0: File "/usr/local/lib/python3.10/multiprocessing/process.py", line 108, in run
0: self._target(*self._args, **self._kwargs)
0: File "/app/bigcodebench/gen/util/__init__.py", line 31, in trusted_exec
0: reliability_guard(max_as_limit, max_data_limit, max_stack_limit)
0: File "/app/bigcodebench/eval/utils.py", line 326, in reliability_guard
0: import matplotlib.pyplot as plt
0: File "/usr/local/lib/python3.10/site-packages/matplotlib/pyplot.py", line 52, in <module>
0: import matplotlib.colorbar
0: File "/usr/local/lib/python3.10/site-packages/matplotlib/colorbar.py", line 19, in <module>
0: from matplotlib import _api, cbook, collections, cm, colors, contour, ticker
0: File "/usr/local/lib/python3.10/site-packages/matplotlib/contour.py", line 13, in <module>
0: from matplotlib.backend_bases import MouseButton
0: File "/usr/local/lib/python3.10/site-packages/matplotlib/backend_bases.py", line 45, in <module>
0: from matplotlib import (
0: File "/usr/local/lib/python3.10/site-packages/matplotlib/text.py", line 16, in <module>
0: from .font_manager import FontProperties
0: File "/usr/local/lib/python3.10/site-packages/matplotlib/font_manager.py", line 1548, in <module>
0: fontManager = _load_fontmanager()
0: File "/usr/local/lib/python3.10/site-packages/matplotlib/font_manager.py", line 1543, in _load_fontmanager
0: json_dump(fm, fm_path)
0: File "/usr/local/lib/python3.10/site-packages/matplotlib/font_manager.py", line 957, in json_dump
0: with cbook._lock_path(filename), open(filename, 'w') as fh:
0: File "/usr/local/lib/python3.10/contextlib.py", line 135, in __enter__
0: return next(self.gen)
0: File "/usr/local/lib/python3.10/site-packages/matplotlib/cbook/__init__.py", line 1809, in _lock_path
0: raise TimeoutError("""\
0: TimeoutError: Lock error: Matplotlib failed to acquire the following lock file:
0: /root/bigcodebench/.cache_date_25-02-09_time_16-34-24/matplotlib/fontlist-v330.json.matplotlib-lock
0: This maybe due to another process holding this lock file. If you are sure no
0: other Matplotlib process is running, remove this file and try again.
This causes the ground-truths pass rate to be low.
0: BigCodeBench-Complete (Full)
0: Groundtruth pass rate: 0.932
0: Please be cautious!
0: Failed tasks: ['BigCodeBench/14', 'BigCodeBench/8', 'BigCodeBench/27', 'BigCodeBench/22', 'BigCodeBench/2', 'BigCodeBench/26', 'BigCodeBench/17', 'BigCodeBench/24', 'BigCodeBench/37', 'BigCodeBench/43', 'BigCodeBench/42', 'BigCodeBench/29', 'BigCodeBench/47', 'BigCodeBench/70', 'BigCodeBench/94', 'BigCodeBench/100', 'BigCodeBench/182', 'BigCodeBench/185', 'BigCodeBench/184', 'BigCodeBench/194', 'BigCodeBench/192', 'BigCodeBench/188', 'BigCodeBench/197', 'BigCodeBench/199', 'BigCodeBench/239', 'BigCodeBench/241', 'BigCodeBench/245', 'BigCodeBench/253', 'BigCodeBench/262', 'BigCodeBench/264', 'BigCodeBench/269', 'BigCodeBench/276', 'BigCodeBench/278', 'BigCodeBench/277', 'BigCodeBench/280', 'BigCodeBench/283', 'BigCodeBench/292', 'BigCodeBench/298', 'BigCodeBench/301', 'BigCodeBench/404', 'BigCodeBench/343', 'BigCodeBench/351', 'BigCodeBench/355', 'BigCodeBench/357', 'BigCodeBench/356', 'BigCodeBench/359', 'BigCodeBench/360', 'BigCodeBench/362', 'BigCodeBench/369', 'BigCodeBench/372', 'BigCodeBench/373', 'BigCo0: deBench/375', 'BigCodeBench/387', 'BigCodeBench/390', 'BigCodeBench/395', 'BigCodeBench/405', 'BigCodeBench/411', 'BigCodeBench/412', 'BigCodeBench/419', 'BigCodeBench/430', 'BigCodeBench/432', 'BigCodeBench/431', 'BigCodeBench/436', 'BigCodeBench/448', 'BigCodeBench/451', 'BigCodeBench/450', 'BigCodeBench/446', 'BigCodeBench/449', 'BigCodeBench/459', 'BigCodeBench/454', 'BigCodeBench/501', 'BigCodeBench/812', 'BigCodeBench/832', 'BigCodeBench/1005', 'BigCodeBench/205', 'BigCodeBench/289', 'BigCodeBench/363', 'BigCodeBench/417']
Potential fix
After some investigation, I suspect it might be because, in the evaluation script, multiple codes to be executed contain logic about matplotlib
trying to load fonts, which causes competition on the same cache file.
To get around this, I was thinking about specifying a unique cache dir for each code to run. The change I made is to bigcodebench/bigcodebench/gen/util/__init__.py
on the trusted_exec
function, to be as follows
import tempfile
def trusted_exec(code, test_code, task_id, max_as_limit, max_data_limit, max_stack_limit, times):
"""Execute trusted code in place."""
# Specify a unique cache dir by modifying XDG_CONFIG_HOME
old_xdg = os.environ.get("XDG_CONFIG_HOME")
temp_xdg = tempfile.mkdtemp(prefix="xdg_config_")
os.environ["XDG_CONFIG_HOME"] = temp_xdg
try:
with create_tempdir():
import shutil
import builtins
rmtree = shutil.rmtree
rmdir = os.rmdir
chdir = os.chdir
module_name = "__test__"
new_module = types.ModuleType(module_name)
reliability_guard(max_as_limit, max_data_limit, max_stack_limit)
# Set necessary attributes for the module
new_module.__dict__.update({
'__builtins__': builtins,
'__file__': f"{module_name}.py",
'__package__': None,
'__doc__': None,
'sys': sys,
'os': os,
'environ': os.environ,
})
# Combine the user code and the test code
full_code = code + "\n" + test_code
# Compile and execute the combined code within the new module
exec(compile(full_code, f"{module_name}.py", 'exec'),
new_module.__dict__)
sys.modules[module_name] = new_module
TestCases = getattr(new_module, 'TestCases')
loader = unittest.TestLoader()
suite = loader.loadTestsFromTestCase(TestCases)
test_result = unittest.TestResult()
start = time.time()
with safe_environment(), swallow_io(), time_limit(seconds=TIMEOUT_LIMIT):
suite.run(test_result)
errors = test_result.failures + test_result.errors
if len(errors) > 0:
print(errors)
times.value = -1
else:
times.value = time.time() - start
# Needed for cleaning up.
shutil.rmtree = rmtree
os.rmdir = rmdir
os.chdir = chdir
finally:
# Restore the original environment variable
if old_xdg is None:
os.environ.pop("XDG_CONFIG_HOME", None)
else:
os.environ["XDG_CONFIG_HOME"] = old_xdg
shutil.rmtree(temp_xdg, ignore_errors=True)
This suppresses most of the matplotlib lock error and increases the groundtruth success rate
0: BigCodeBench-Complete (Full)
0: Groundtruth pass rate: 0.991
0: Failed tasks: ['BigCodeBench/39', 'BigCodeBench/16', 'BigCodeBench/1', 'BigCodeBench/832', 'BigCodeBench/812', 'BigCodeBench/1005', 'BigCodeBench/205', 'BigCodeBench/289', 'BigCodeBench/363', 'BigCodeBench/417']
Ask for Comments
I am not sure whether the issue I've seen is indeed due to the reason I was thinking about. And if that was the case, I am not sure whether my fix is valid. So, I wish to see whether you have any feedback on this. Thanks a lot!