3
3
import os
4
4
import socket
5
5
import time
6
- import traceback
7
6
from typing import Dict , List , Optional , Union
8
7
9
8
import requests
10
9
from requests .adapters import HTTPAdapter
10
+ from requests .exceptions import ConnectionError
11
11
from urllib3 .connection import HTTPConnection
12
12
13
13
try :
@@ -104,7 +104,7 @@ def __init__(
104
104
def generate (
105
105
self ,
106
106
inputs : Union [List [str ], PromptList ],
107
- max_out_len : int = 4096 ,
107
+ max_out_len : int = 11264 ,
108
108
) -> List [str ]:
109
109
"""Generate results given a list of inputs.
110
110
@@ -128,7 +128,7 @@ def generate(
128
128
): i
129
129
for i , input in enumerate (inputs )
130
130
}
131
- results = ['' ] * len (inputs )
131
+ results = ['' ] * len (inputs ) # set empty str in advance
132
132
for future in concurrent .futures .as_completed (future_to_m ):
133
133
m = future_to_m [future ] # noqa F841
134
134
resp = future .result ()
@@ -198,34 +198,29 @@ def _generate(
198
198
'messages' :
199
199
messages ,
200
200
'max_tokens' :
201
- max (
202
- max_out_len if max_out_len else 4096 ,
203
- self .max_seq_len if self .max_seq_len else 4096 ,
204
- ),
201
+ 11264 ,
205
202
}
206
203
request .update (self .generation_kwargs )
207
- try :
208
- retry_num = 0
209
- while retry_num < self .retry :
204
+
205
+ retry_num = 0
206
+ while retry_num < self .retry :
207
+ try :
210
208
response = self ._infer_result (request , sess )
211
- if response .status_code == 200 :
212
- break # success
213
- elif response .status_code == 426 :
214
- retry_num += 1 # retry
215
- elif response .status_code in [302 , 429 , 500 , 504 ]:
216
- time .sleep (BAILING_RETRY_DELAY )
217
- retry_num += 1 # retry
218
- else :
219
- raise ValueError (f'Status code = { response .status_code } ' )
209
+ except ConnectionError :
210
+ time .sleep (BAILING_RETRY_DELAY )
211
+ retry_num += 1 # retry
212
+ if response .status_code == 200 :
213
+ break # success
214
+ elif response .status_code == 426 :
215
+ retry_num += 1 # retry
216
+ elif response .status_code in [302 , 429 , 500 , 504 ]:
217
+ time .sleep (BAILING_RETRY_DELAY )
218
+ retry_num += 1 # retry
220
219
else :
221
- raise ValueError (
222
- f'Exceed the maximal retry times. Last status code '
223
- f'= { response .status_code } ' )
224
- except Exception as e :
225
- self .logger .error (f'Fail to inference request={ request } ; '
226
- f'model_name={ self .path } ; error={ e } , '
227
- f'stack:{ traceback .format_exc ()} ' )
228
- raise e
220
+ raise ValueError (f'Status code = { response .status_code } ' )
221
+ else :
222
+ # Exceed the maximal retry times, return empty str
223
+ return ''
229
224
return response
230
225
231
226
# @retry(stop_max_attempt_number=3, wait_fixed=16000) # ms
0 commit comments