@@ -24,7 +24,7 @@ def helper(self, model, input_tensor, patterns, patch_info, param_counts):
2424 mp .patch_model (model )
2525 out = model (input_tensor )
2626
27- def test1 (self ):
27+ def test0 (self ):
2828 density = 0.5
2929 for bias in [False , True ]:
3030 for patch_info in [{"density" :0.5 }, {"density" :density , "pseudo_linear" :True }]:
@@ -44,35 +44,56 @@ def test1(self):
4444
4545 self .helper (model , input_tensor , ["0" ], patch_info = patch_info , param_counts = [pc , pc_sparse ])
4646
47+ def roberta_build (self , sparse = False , base_model = None , density = 1.0 , eval = True ):
48+ if base_model == None :
49+ config = RobertaConfig (
50+ vocab_size = 52_000 ,
51+ max_position_embeddings = 514 ,
52+ num_attention_heads = 12 ,
53+ num_hidden_layers = 6 ,
54+ type_vocab_size = 1 ,
55+ )
4756
48- def test0 (self ):
49- config = RobertaConfig (
50- vocab_size = 52_000 ,
51- max_position_embeddings = 514 ,
52- num_attention_heads = 12 ,
53- num_hidden_layers = 6 ,
54- type_vocab_size = 1 ,
55- )
57+ model = RobertaForMaskedLM (config = config ).cuda ()
58+ else :
59+ model = base_model
5660
57- model = RobertaForMaskedLM (config = config ).cuda ()
58- model .eval ()
61+ if sparse :
62+ mp = BlockSparseModelPatcher ()
63+ mp .add_pattern ("roberta\.encoder\.layer\.[0-9]+.intermediate\.dense" , {"density" : density })
64+ mp .add_pattern ("roberta\.encoder\.layer\.[0-9]+.output\.dense" , {"density" : density })
65+ mp .patch_model (model )
5966
60- verbose = False
67+ if eval :
68+ model .eval ()
69+
70+ return model , model .num_parameters ()
71+
72+
73+ def test1 (self ):
74+ model0 , num_parameters0 = self .roberta_build ()
75+
76+ input_ids = torch .tensor ([[4 , 5 , 6 , 7 ] * 8 ]).cuda ()
77+ input_ids = input_ids .expand ((1 , 32 ))
78+
79+ out0 = model0 (input_ids )
80+
81+ model1 , num_parameters1 = self .roberta_build (sparse = True , base_model = model0 )
82+ out1 = model1 (input_ids )
83+
84+ self .assertTrue (torch .isclose (out0 [0 ], out1 [0 ], atol = 1e-3 ).all ())
85+
86+ model2 , num_parameters2 = self .roberta_build (sparse = True , density = 0.5 , eval = True )
87+ model2 .eval ()
88+
89+ out2 = model2 (input_ids )
90+
91+ self .assertEqual (num_parameters0 , num_parameters1 )
92+ self .assertGreater (70000000 , num_parameters2 )
93+
94+ def test_full (self ):
95+ pass
6196
62- for i in range (2 ):
63- # => 70 million parameters instead of 84 million parameters when i = 1
64- print ("model num parameters" , model .num_parameters ())
65-
66- input_ids = torch .tensor ([[4 , 5 , 6 , 7 ]* 8 ]).cuda ()
67- input_ids = input_ids .expand ((1 , 32 ))
68- out = model (input_ids )
69- if verbose :
70- print (out )
71- if i == 0 :
72- mp = BlockSparseModelPatcher ()
73- mp .add_pattern ("roberta\.encoder\.layer\.[0-9]+.intermediate\.dense" , {"density" :0.5 })
74- mp .add_pattern ("roberta\.encoder\.layer\.[0-9]+.output\.dense" , {"density" :0.5 })
75- mp .patch_model (model )
7697
7798if __name__ == '__main__' :
7899 unittest .main ()
0 commit comments