@@ -16,6 +16,7 @@ limitations under the License. */
1616#include < gtest/gtest.h>
1717#include " gflags/gflags.h"
1818
19+ #include " paddle/fluid/inference/tensorrt/helper.h"
1920#include " paddle/fluid/inference/tests/api/trt_test_helper.h"
2021
2122namespace paddle {
@@ -143,5 +144,136 @@ TEST(AnalysisPredictor, fp16) {
143144#endif
144145}
145146
147+ // ernie_varlen
148+ std::shared_ptr<paddle_infer::Predictor> InitPredictor () {
149+ paddle_infer::Config config;
150+ config.SetModel (FLAGS_infer_model);
151+
152+ config.EnableUseGpu (100 , 0 );
153+
154+ // Open the memory optim.
155+ config.EnableMemoryOptim ();
156+
157+ int max_batch = 32 ;
158+ int max_single_seq_len = 128 ;
159+ int opt_single_seq_len = 64 ;
160+ int min_batch_seq_len = 1 ;
161+ int max_batch_seq_len = 512 ;
162+ int opt_batch_seq_len = 256 ;
163+
164+ std::string input_name0 = " read_file_0.tmp_0" ;
165+ std::string input_name1 = " read_file_0.tmp_1" ;
166+ std::string input_name2 = " read_file_0.tmp_2" ;
167+ std::string input_name3 = " read_file_0.tmp_4" ;
168+
169+ std::vector<int > min_shape = {min_batch_seq_len};
170+ std::vector<int > max_shape = {max_batch_seq_len};
171+ std::vector<int > opt_shape = {opt_batch_seq_len};
172+ // Set the input's min, max, opt shape
173+ std::map<std::string, std::vector<int >> min_input_shape = {
174+ {input_name0, min_shape},
175+ {input_name1, min_shape},
176+ {input_name2, {1 }},
177+ {input_name3, {1 , 1 , 1 }}};
178+ std::map<std::string, std::vector<int >> max_input_shape = {
179+ {input_name0, max_shape},
180+ {input_name1, max_shape},
181+ {input_name2, {max_batch + 1 }},
182+ {input_name3, {1 , max_single_seq_len, 1 }}};
183+ std::map<std::string, std::vector<int >> opt_input_shape = {
184+ {input_name0, opt_shape},
185+ {input_name1, opt_shape},
186+ {input_name2, {max_batch + 1 }},
187+ {input_name3, {1 , opt_single_seq_len, 1 }}};
188+
189+ // only kHalf supported
190+ config.EnableTensorRtEngine (
191+ 1 << 30 , 1 , 5 , paddle_infer::Config::Precision::kHalf , false , false );
192+ // erinie varlen must be used with dynamic shape
193+ config.SetTRTDynamicShapeInfo (min_input_shape, max_input_shape,
194+ opt_input_shape);
195+ // erinie varlen must be used with oss
196+ config.EnableTensorRtOSS ();
197+
198+ return paddle_infer::CreatePredictor (config);
199+ }
200+
201+ void run (paddle_infer::Predictor* predictor, std::vector<float >* out_data) {
202+ const int run_batch = 2 ;
203+ const int run_seq_len = 71 ;
204+ const int max_seq_len = 128 ;
205+
206+ int32_t i1[run_seq_len] = {
207+ // sentence 1
208+ 1 , 3558 , 4 , 75 , 491 , 89 , 340 , 313 , 93 , 4 , 255 , 10 , 75 , 321 , 4095 , 1902 , 4 ,
209+ 134 , 49 , 75 , 311 , 14 , 44 , 178 , 543 , 15 , 12043 , 2 , 75 , 201 , 340 , 9 , 14 , 44 ,
210+ 486 , 218 , 1140 , 279 , 12043 , 2 ,
211+ // sentence 2
212+ 101 , 2054 , 2234 , 2046 , 2486 , 2044 , 1996 , 2047 , 4552 , 2001 , 9536 , 1029 ,
213+ 102 , 2004 , 1997 , 2008 , 2154 , 1010 , 1996 , 2047 , 4552 , 9536 , 2075 , 1996 ,
214+ 2117 , 3072 , 2234 , 2046 , 2486 , 1012 , 102 ,
215+ };
216+ int32_t i2[run_seq_len] = {
217+ // sentence 1
218+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
219+ 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
220+ // sentence 2
221+ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
222+ 1 , 1 , 1 , 1 , 1 , 1 };
223+ // shape info of this batch
224+ int32_t i3[3 ] = {0 , 40 , 71 };
225+ // max_seq_len represents the max sentence length of all the sentences, only
226+ // length of
227+ // input i4 is useful, data means nothing.
228+ int32_t i4[max_seq_len] = {0 };
229+
230+ auto input_names = predictor->GetInputNames ();
231+ // first input
232+ auto input_t1 = predictor->GetInputHandle (input_names[0 ]);
233+ input_t1->Reshape ({run_seq_len});
234+ input_t1->CopyFromCpu (i1);
235+
236+ // second input
237+ auto input_t2 = predictor->GetInputHandle (input_names[1 ]);
238+ input_t2->Reshape ({run_seq_len});
239+ input_t2->CopyFromCpu (i2);
240+
241+ // third input
242+ auto input_t3 = predictor->GetInputHandle (input_names[2 ]);
243+ input_t3->Reshape ({run_batch + 1 });
244+ input_t3->CopyFromCpu (i3);
245+
246+ // fourth input
247+ auto input_t4 = predictor->GetInputHandle (input_names[3 ]);
248+ input_t4->Reshape ({1 , max_seq_len, 1 });
249+ input_t4->CopyFromCpu (i4);
250+
251+ CHECK (predictor->Run ());
252+
253+ auto output_names = predictor->GetOutputNames ();
254+ auto output_t = predictor->GetOutputHandle (output_names[0 ]);
255+ std::vector<int > output_shape = output_t ->shape ();
256+ int out_num = std::accumulate (output_shape.begin (), output_shape.end (), 1 ,
257+ std::multiplies<int >());
258+ out_data->resize (out_num);
259+ output_t ->CopyToCpu (out_data->data ());
260+
261+ return ;
262+ }
263+
264+ TEST (AnalysisPredictor, ernie_varlen) {
265+ #if IS_TRT_VERSION_GE(7234)
266+ auto predictor = InitPredictor ();
267+ std::vector<float > out_data;
268+ run (predictor.get (), &out_data);
269+ std::vector<float > ref_data{0.59814 , 0.219882 , 0.181978 ,
270+ 0.359796 , 0.577414 , 0.0627908 };
271+ float near_tolerance = 1e-3 ;
272+ for (size_t i = 0 ; i < out_data.size (); i++) {
273+ EXPECT_NEAR (ref_data[i], out_data[i], near_tolerance);
274+ }
275+ #endif
276+ }
277+
146278} // namespace inference
147279} // namespace paddle
0 commit comments