Skip to content

Commit bcb707d

Browse files
authored
[Fix] Fix BailingAPI model (#1707)
* [fix] sequence under the multiple samples * resolve the lint problems * change the parameter name * add another error code for retry * output the log for invalid response * format correction * update * update * update * update * add two model python files * update the default parameter * use random for delay * update the api example of bailing * remove the unnecessary parameter
1 parent ef695e2 commit bcb707d

File tree

6 files changed

+60
-62
lines changed

6 files changed

+60
-62
lines changed

configs/api_examples/eval_api_bailing.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,19 @@
1515

1616
models = [
1717
dict(
18-
path='Bailing-Lite-0830',
18+
path='Bailing-Lite-1116',
1919
token='xxxxxx', # set your key here or in environment variable BAILING_API_KEY
2020
url='https://bailingchat.alipay.com/chat/completions',
2121
type=BailingAPI,
22-
generation_kwargs={},
23-
query_per_second=1,
24-
max_seq_len=4096,
22+
max_out_len=11264,
23+
batch_size=1,
24+
generation_kwargs={
25+
'temperature': 0.01,
26+
'top_p': 1.0,
27+
'top_k': -1,
28+
'n': 1,
29+
'logprobs': 1,
30+
},
2531
),
2632
]
2733

configs/models/bailing_api/bailing-pro-0920.py configs/models/bailing_api/bailing-lite-1116.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,19 @@
1010

1111
models = [
1212
dict(
13-
path='Bailing-Pro-0920',
13+
path='Bailing-Lite-1116',
1414
token='', # set your key here or in environment variable BAILING_API_KEY
1515
url='https://bailingchat.alipay.com/chat/completions',
1616
type=BailingAPI,
1717
meta_template=api_meta_template,
18-
query_per_second=1,
19-
max_seq_len=4096,
18+
max_out_len=11264,
2019
batch_size=1,
2120
generation_kwargs={
22-
'temperature': 0.4,
21+
'temperature': 0.01,
2322
'top_p': 1.0,
2423
'top_k': -1,
2524
'n': 1,
2625
'logprobs': 1,
27-
'use_beam_search': False,
2826
},
2927
),
3028
]

opencompass/configs/models/bailing_api/bailing-pro-0920.py configs/models/bailing_api/bailing-pro-1120.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,19 @@
1010

1111
models = [
1212
dict(
13-
path='Bailing-Pro-0920',
13+
path='Bailing-Pro-1120',
1414
token='', # set your key here or in environment variable BAILING_API_KEY
1515
url='https://bailingchat.alipay.com/chat/completions',
1616
type=BailingAPI,
1717
meta_template=api_meta_template,
18-
query_per_second=1,
19-
max_seq_len=4096,
18+
max_out_len=11264,
2019
batch_size=1,
2120
generation_kwargs={
22-
'temperature': 0.4,
21+
'temperature': 0.01,
2322
'top_p': 1.0,
2423
'top_k': -1,
2524
'n': 1,
2625
'logprobs': 1,
27-
'use_beam_search': False,
2826
},
2927
),
3028
]

configs/models/bailing_api/bailing-lite-0830.py opencompass/configs/models/bailing_api/bailing-lite-1116.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,19 @@
1010

1111
models = [
1212
dict(
13-
path='Bailing-Lite-0830',
13+
path='Bailing-Lite-1116',
1414
token='', # set your key here or in environment variable BAILING_API_KEY
1515
url='https://bailingchat.alipay.com/chat/completions',
1616
type=BailingAPI,
1717
meta_template=api_meta_template,
18-
query_per_second=1,
19-
max_seq_len=4096,
18+
max_out_len=11264,
2019
batch_size=1,
2120
generation_kwargs={
22-
'temperature': 0.4,
21+
'temperature': 0.01,
2322
'top_p': 1.0,
2423
'top_k': -1,
2524
'n': 1,
2625
'logprobs': 1,
27-
'use_beam_search': False,
2826
},
2927
),
3028
]

opencompass/configs/models/bailing_api/bailing-lite-0830.py opencompass/configs/models/bailing_api/bailing-pro-1120.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,19 @@
1010

1111
models = [
1212
dict(
13-
path='Bailing-Lite-0830',
13+
path='Bailing-Pro-1120',
1414
token='', # set your key here or in environment variable BAILING_API_KEY
1515
url='https://bailingchat.alipay.com/chat/completions',
1616
type=BailingAPI,
1717
meta_template=api_meta_template,
18-
query_per_second=1,
19-
max_seq_len=4096,
18+
max_out_len=11264,
2019
batch_size=1,
2120
generation_kwargs={
22-
'temperature': 0.4,
21+
'temperature': 0.01,
2322
'top_p': 1.0,
2423
'top_k': -1,
2524
'n': 1,
2625
'logprobs': 1,
27-
'use_beam_search': False,
2826
},
2927
),
3028
]

opencompass/models/bailing_api_oc.py

+38-38
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
import concurrent
22
import concurrent.futures
33
import os
4+
import random
45
import socket
56
import time
6-
import traceback
77
from typing import Dict, List, Optional, Union
88

99
import requests
1010
from requests.adapters import HTTPAdapter
11+
from requests.exceptions import ConnectionError
1112
from urllib3.connection import HTTPConnection
1213

1314
try:
@@ -21,8 +22,6 @@
2122

2223
PromptType = Union[PromptList, str]
2324

24-
BAILING_RETRY_DELAY: int = 30
25-
2625

2726
class HTTPAdapterWithSocketOptions(HTTPAdapter):
2827

@@ -104,7 +103,7 @@ def __init__(
104103
def generate(
105104
self,
106105
inputs: Union[List[str], PromptList],
107-
max_out_len: int = 4096,
106+
max_out_len: int = 11264,
108107
) -> List[str]:
109108
"""Generate results given a list of inputs.
110109
@@ -128,24 +127,33 @@ def generate(
128127
): i
129128
for i, input in enumerate(inputs)
130129
}
131-
results = []
130+
results = [''] * len(inputs)
132131
for future in concurrent.futures.as_completed(future_to_m):
133132
m = future_to_m[future] # noqa F841
134133
resp = future.result()
135134
if resp and resp.status_code == 200:
136135
try:
137136
result = resp.json()
138137
except Exception as e: # noqa F841
139-
results.append('')
138+
self.logger.error(f'Fail to inference; '
139+
f'model_name={self.path}; '
140+
f'error={e}, '
141+
f'request={inputs[m]}')
140142
else:
141143
if (result.get('choices')
142144
and result['choices'][0].get('message') and
143145
result['choices'][0]['message'].get('content')
144146
is not None):
145-
results.append(
146-
result['choices'][0]['message']['content'])
147+
results[m] = \
148+
result['choices'][0]['message']['content']
149+
else:
150+
self.logger.error(f'Receive invalid result. '
151+
f'result={result}; '
152+
f'request={inputs[m]}')
147153
else:
148-
results.append('')
154+
self.logger.error(f'Receive invalid response. '
155+
f'response={resp}; '
156+
f'request={inputs[m]}')
149157
self.flush()
150158
return results
151159

@@ -184,39 +192,31 @@ def _generate(
184192
message['role'] = item['role']
185193
messages.append(message)
186194
request = {
187-
'model':
188-
self._model,
189-
'messages':
190-
messages,
191-
'max_seq_len':
192-
max(
193-
max_out_len if max_out_len else 4096,
194-
self.max_seq_len if self.max_seq_len else 4096,
195-
),
195+
'model': self._model,
196+
'messages': messages,
197+
'max_tokens': max_out_len,
196198
}
197199
request.update(self.generation_kwargs)
198-
try:
199-
retry_num = 0
200-
while retry_num < self.retry:
200+
retry_num = 0
201+
while retry_num < self.retry:
202+
try:
201203
response = self._infer_result(request, sess)
202-
if response.status_code == 200:
203-
break # success
204-
elif response.status_code == 426:
205-
retry_num += 1 # retry
206-
elif response.status_code in [429, 500, 504]:
207-
time.sleep(BAILING_RETRY_DELAY)
208-
retry_num += 1 # retry
209-
else:
210-
raise ValueError(f'Status code = {response.status_code}')
204+
except ConnectionError:
205+
time.sleep(random.randint(10, 30))
206+
retry_num += 1 # retry
207+
continue
208+
if response.status_code == 200:
209+
break # success
210+
elif response.status_code == 426:
211+
retry_num += 1 # retry
212+
elif response.status_code in [302, 429, 500, 504]:
213+
time.sleep(random.randint(10, 30))
214+
retry_num += 1 # retry
211215
else:
212-
raise ValueError(
213-
f'Exceed the maximal retry times. Last status code '
214-
f'= {response.status_code}')
215-
except Exception as e:
216-
self.logger.error(f'Fail to inference request={request}; '
217-
f'model_name={self.path}; error={e}, '
218-
f'stack:{traceback.format_exc()}')
219-
raise e
216+
raise ValueError(f'Status code = {response.status_code}')
217+
else:
218+
# Exceed the maximal retry times.
219+
return ''
220220
return response
221221

222222
# @retry(stop_max_attempt_number=3, wait_fixed=16000) # ms

0 commit comments

Comments
 (0)