2121from tvm .contrib import graph_runtime
2222from tvm import relay
2323from tvm .relay import testing
24+ from tvm .relay import vm
25+ from tvm .relay import vmobj as _obj
2426
2527
2628def benchmark_execution (mod ,
2729 params ,
28- measure = False ,
30+ measure = True ,
2931 data_shape = (1 , 3 , 224 , 224 ),
3032 out_shape = (1 , 1000 ),
31- dtype = 'float32' ):
32- def get_tvm_output (mod , data , params , target , ctx , dtype = 'float32' ):
33- with relay .build_config (opt_level = 1 ):
33+ dtype = 'float32' ,
34+ model = "unknown" ):
35+ def get_graph_runtime_output (mod , data , params , target , ctx ,
36+ dtype = 'float32' , number = 2 , repeat = 20 ):
37+ with relay .build_config (opt_level = 3 ):
3438 graph , lib , params = relay .build (mod , target , params = params )
3539
3640 m = graph_runtime .create (graph , lib , ctx )
@@ -41,60 +45,81 @@ def get_tvm_output(mod, data, params, target, ctx, dtype='float32'):
4145 out = m .get_output (0 , tvm .nd .empty (out_shape , dtype ))
4246
4347 if measure :
44- print ("Evaluate graph runtime inference time cost..." )
48+ print ("Evaluate graph runtime inference cost of {} on "
49+ "{}" .format (model , repr (ctx )))
4550 ftimer = m .module .time_evaluator ("run" , ctx , number = 1 , repeat = 20 )
4651 # Measure in millisecond.
4752 prof_res = np .array (ftimer ().results ) * 1000
48- print ("Mean inference time (std dev): %.2f ms (%.2f ms)" %
53+ print ("Mean graph runtime inference time (std dev): %.2f ms (%.2f ms)" %
4954 (np .mean (prof_res ), np .std (prof_res )))
5055
5156 return out .asnumpy ()
5257
53- def get_tvm_vm_output (mod , data , params , target , ctx , dtype = 'float32' ):
54- ex = relay .create_executor ('vm' , mod = mod , ctx = ctx )
55- result = ex .evaluate ()(data , ** params )
58+ def get_vm_output (mod , data , params , target , ctx , dtype = 'float32' ,
59+ number = 2 , repeat = 20 ):
60+ with relay .build_config (opt_level = 3 ):
61+ exe = vm .compile (mod , target , params = params )
62+ rly_vm = vm .VirtualMachine (exe )
63+ rly_vm .init (ctx )
64+ result = rly_vm .run (data )
65+
66+ if measure :
67+ print ("Evaluate vm inference cost of {} on {}" .format (model ,
68+ repr (ctx )))
69+ ftimer = rly_vm .mod .time_evaluator ("invoke" , ctx , number = number ,
70+ repeat = repeat )
71+ # Measure in millisecond.
72+ prof_res = np .array (ftimer ("main" , _obj .Tensor (data )).results ) * 1000
73+ print ("Mean vm inference time (std dev): %.2f ms (%.2f ms)" %
74+ (np .mean (prof_res ), np .std (prof_res )))
75+
5676 return result .asnumpy ().astype (dtype )
5777
5878 # random input
5979 data = np .random .uniform (size = data_shape ).astype (dtype )
6080 target = "llvm"
6181 ctx = tvm .cpu (0 )
6282
63- tvm_out = get_tvm_output (mod , tvm .nd .array (data .astype (dtype )), params ,
64- target , ctx , dtype )
65- vm_out = get_tvm_vm_output (mod , tvm .nd .array (data .astype (dtype )), params ,
66- target , ctx , dtype )
83+ tvm_out = get_graph_runtime_output (mod , tvm .nd .array (data .astype (dtype )),
84+ params , target , ctx , dtype )
85+ vm_out = get_vm_output (mod , tvm .nd .array (data .astype (dtype )), params ,
86+ target , ctx , dtype )
6787 tvm .testing .assert_allclose (vm_out , tvm_out , rtol = 1e-5 , atol = 1e-5 )
6888
6989
7090def test_mlp ():
7191 image_shape = (1 , 1 , 28 , 28 )
7292 mod , params = testing .mlp .get_workload (1 )
73- benchmark_execution (mod , params , data_shape = image_shape , out_shape = (1 , 10 ))
93+ benchmark_execution (mod , params , data_shape = image_shape , out_shape = (1 , 10 ),
94+ model = "mlp" )
7495
7596
7697def test_vgg ():
7798 for n in [11 , 16 ]:
7899 mod , params = testing .vgg .get_workload (1 , num_layers = n )
79- benchmark_execution (mod , params )
100+ model = "vgg" + str (n )
101+ benchmark_execution (mod , params , model = model )
80102
81103
82104def test_resnet ():
83105 for n in [18 , 50 ]:
84106 mod , params = testing .resnet .get_workload (batch_size = 1 , num_layers = n )
85- benchmark_execution (mod , params , True )
107+ model = "resnet" + str (n )
108+ benchmark_execution (mod , params , model = model )
86109
87110
88111def test_squeezenet ():
89112 for version in ['1.0' , '1.1' ]:
90113 mod , params = testing .squeezenet .get_workload (version = version )
91- benchmark_execution (mod , params )
114+ model = "squeezenet" + version
115+ benchmark_execution (mod , params , model = model )
92116
93117
94118def test_inception_v3 ():
95119 image_shape = (3 , 299 , 299 )
96120 mod , params = testing .inception_v3 .get_workload (image_shape = image_shape )
97- benchmark_execution (mod , params , data_shape = (1 , 3 , 299 , 299 ))
121+ benchmark_execution (mod , params , data_shape = (1 , 3 , 299 , 299 ),
122+ model = "inception_v3" )
98123
99124
100125def test_dqn ():
@@ -112,7 +137,7 @@ def test_dcgan():
112137
113138def test_mobilenet ():
114139 mod , params = testing .mobilenet .get_workload (batch_size = 1 )
115- benchmark_execution (mod , params )
140+ benchmark_execution (mod , params , model = "mobilenet" )
116141
117142# TODO: enable when the low building performance (several minutes) fixed.
118143def test_mobilenet_nhwc ():
@@ -124,7 +149,7 @@ def test_mobilenet_nhwc():
124149
125150def test_densenet ():
126151 mod , params = testing .densenet .get_workload (batch_size = 1 )
127- benchmark_execution (mod , params )
152+ benchmark_execution (mod , params , model = "densenet" )
128153
129154
130155if __name__ == '__main__' :
0 commit comments