2
2
Helpers for Evaluations
3
3
"""
4
4
5
- import requests
6
- import torch
7
- import torch .nn as nn
5
+ import importlib
6
+ import json
8
7
import os , subprocess
9
- from pydantic import BaseModel
10
- import numpy as np
11
8
import random
12
- import json
13
- from contextlib import redirect_stdout , redirect_stderr
14
- from io import StringIO
15
9
import sys
16
- import importlib
17
10
import tempfile
11
+ from contextlib import redirect_stderr , redirect_stdout
12
+ from io import StringIO
13
+
14
+ import numpy as np
15
+ import requests
16
+ import torch
17
+ import torch .nn as nn
18
+ from pydantic import BaseModel
19
+
18
20
from . import utils
19
21
20
22
REPO_TOP_PATH = os .path .abspath (
25
27
)
26
28
KERNEL_BENCH_PATH = os .path .join (REPO_TOP_PATH , "KernelBench" )
27
29
30
+
28
31
def import_ModelNew_from_code (code_string ):
29
32
"""
30
33
Writes the provided Python code string to a temporary .py file,
31
34
dynamically imports the module so we can access 'ModelNew',
32
35
33
36
This is a hack in order to allow decorators (useful for triton code) in the custom kernel code
34
37
Unfortunately, this means that we cannot delete the tempfile until the model itself is deleted,
35
- so we need to do a bit of garbage collection ourselves (callers responsibility) and delete the tempfile
38
+ so we need to do a bit of garbage collection ourselves (callers responsibility) and delete the tempfile
36
39
when the model is deleted / before the program exits
37
40
The name of the tempfile is returned so we can delete it later.
38
41
"""
@@ -179,7 +182,9 @@ def _cleanup_cuda_extensions():
179
182
shutil .rmtree (torch_extensions_path )
180
183
181
184
182
- def graceful_eval_cleanup (curr_context : dict , device : torch .device , tempfile_path : str = None ):
185
+ def graceful_eval_cleanup (
186
+ curr_context : dict , device : torch .device , tempfile_path : str = None
187
+ ):
183
188
"""
184
189
Clean up env, gpu cache, and compiled CUDA extensions after evaluation
185
190
""" # delete ran-specific function definitions before next eval run
@@ -200,6 +205,7 @@ def graceful_eval_cleanup(curr_context: dict, device: torch.device, tempfile_pat
200
205
if tempfile_path :
201
206
os .remove (tempfile_path )
202
207
208
+
203
209
def build_compile_cache_legacy (
204
210
custom_model_src : str ,
205
211
verbose : bool = False ,
@@ -233,11 +239,12 @@ def build_compile_cache_legacy(
233
239
if verbose :
234
240
print (f"[Compilation] Compilation Successful, saved cache at: { build_dir } " )
235
241
except Exception as e :
236
- print (f"[Compilation] Failed to compile custom CUDA kernel. Unable to cache, \n Error: { e } " )
242
+ print (
243
+ f"[Compilation] Failed to compile custom CUDA kernel. Unable to cache, \n Error: { e } "
244
+ )
237
245
return False , stdout_buffer .getvalue (), str (e )
238
-
239
- return True , stdout_buffer .getvalue (), None
240
246
247
+ return True , stdout_buffer .getvalue (), None
241
248
242
249
243
250
def build_compile_cache (
@@ -273,16 +280,16 @@ def build_compile_cache(
273
280
if verbose :
274
281
print (f"[Compilation] Compilation Successful, saved cache at: { build_dir } " )
275
282
except Exception as e :
276
- print (f"[Compilation] Failed to compile custom CUDA kernel. Unable to cache, \n Error: { e } " )
283
+ print (
284
+ f"[Compilation] Failed to compile custom CUDA kernel. Unable to cache, \n Error: { e } "
285
+ )
277
286
return False , stdout_buffer .getvalue (), str (e )
278
287
279
288
return True , stdout_buffer .getvalue (), None
280
289
281
290
282
291
def build_compile_cache_with_capturing (
283
- custom_model_src : str ,
284
- verbose : bool = False ,
285
- build_dir : os .PathLike = None
292
+ custom_model_src : str , verbose : bool = False , build_dir : os .PathLike = None
286
293
) -> tuple [int , str , str ]:
287
294
"""
288
295
Write a temporary python file to compile the custom model on CPU
@@ -304,22 +311,21 @@ def build_compile_cache_with_capturing(
304
311
f .write (custom_model_src )
305
312
306
313
# Execute the temporary Python file and capture output
307
- process = subprocess .Popen (['python' , tmp ], stdout = subprocess .PIPE , stderr = subprocess .PIPE )
314
+ process = subprocess .Popen (
315
+ ["python" , tmp ], stdout = subprocess .PIPE , stderr = subprocess .PIPE
316
+ )
308
317
stdout , stderr = process .communicate ()
309
318
returncode = process .returncode
310
319
311
320
# Clean up temporary file
312
321
os .remove (tmp )
313
322
314
-
315
323
if verbose :
316
324
print ("[CPU Precompile] return code: " , returncode )
317
- print ("[CPU Precompile] stdout: \n " , stdout .decode ('utf-8' ))
318
- print ("[CPU Precompile] stderr: \n " , stderr .decode ('utf-8' ))
319
-
320
- return returncode , stdout .decode ('utf-8' ), stderr .decode ('utf-8' )
321
-
325
+ print ("[CPU Precompile] stdout: \n " , stdout .decode ("utf-8" ))
326
+ print ("[CPU Precompile] stderr: \n " , stderr .decode ("utf-8" ))
322
327
328
+ return returncode , stdout .decode ("utf-8" ), stderr .decode ("utf-8" )
323
329
324
330
325
331
def eval_kernel_against_ref (
@@ -331,7 +337,9 @@ def eval_kernel_against_ref(
331
337
verbose : bool = False ,
332
338
measure_performance : bool = False ,
333
339
build_dir : os .PathLike = None ,
334
- device : torch .device = torch .cuda .current_device () if torch .cuda .is_available () else None , # have to run on GPU
340
+ device : torch .device = (
341
+ torch .cuda .current_device () if torch .cuda .is_available () else None
342
+ ), # have to run on GPU
335
343
) -> KernelExecResult :
336
344
"""
337
345
Evaluate the custom kernel against the original model
@@ -382,9 +390,12 @@ def eval_kernel_against_ref(
382
390
383
391
# this is where compilation happens
384
392
try :
393
+ tempfile_path = None # in case load_custom_model fails
385
394
os .environ ["TORCH_USE_CUDA_DSA" ] = "1" # compile with device side assertion
386
395
# add hash for later to distinguish between multi-turn kernels
387
- ModelNew , tempfile_path = load_custom_model (custom_model_src , context , build_dir )
396
+ ModelNew , tempfile_path = load_custom_model (
397
+ custom_model_src , context , build_dir
398
+ )
388
399
torch .cuda .synchronize (device = device ) # not sure if this is too much
389
400
except Exception as e :
390
401
print (
@@ -398,7 +409,7 @@ def eval_kernel_against_ref(
398
409
print (
399
410
f"[Eval] Lock file error during compilation, Please retry. Error: { e } "
400
411
)
401
- graceful_eval_cleanup (context , device , tempfile_path )
412
+ graceful_eval_cleanup (context , device )
402
413
return None
403
414
else :
404
415
metadata ["compilation_error" ] = e
@@ -709,11 +720,13 @@ def check_metadata_serializable(metadata: dict):
709
720
710
721
return metadata
711
722
723
+
712
724
def check_metadata_serializable_all_types (metadata : dict ):
713
725
"""
714
726
Ensure metadata is JSON serializable,
715
727
if not, convert non-serializable values to strings recursively
716
728
"""
729
+
717
730
def convert_to_serializable (obj ):
718
731
if isinstance (obj , dict ):
719
732
return {k : convert_to_serializable (v ) for k , v in obj .items ()}
0 commit comments