Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ click
pyyaml
psutil>=5.0.0
nvidia-ml-py
boto3>=1.35.49
botocore
pydantic~=2.0
pyecharts>=2.0.0
wrapt>=1.17.0
Expand Down
6 changes: 0 additions & 6 deletions swanlab/api/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

from swanlab.api.base import ApiBase, ApiHTTP
from swanlab.api.types import ApiResponse, Experiment, Pagination
from swanlab.package import get_host_api

try:
from pandas import DataFrame
Expand Down Expand Up @@ -181,11 +180,6 @@ def get_metrics(
continue

url:str = resp.data.get("url", "")
# 私有化环境可能不会携带 ip:https://github.com/SwanHubX/SwanLab/issues/1267
if not (url.startswith('https://') or url.startswith('http://')):
url = get_host_api().split('/api')[0] + url # url 已添加前缀 /


df = pd.read_csv(url, index_col=0)

if idx == 0:
Expand Down
1 change: 0 additions & 1 deletion swanlab/core_python/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,3 @@
"""

from .client import *
from .session import create_session
38 changes: 38 additions & 0 deletions swanlab/core_python/api/experiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
@author: cunyue
@file: experiment.py
@time: 2025/12/11 18:37
@description: 定义实验相关的后端API接口
"""

from typing import Literal

from swanlab.core_python.client import Client


def update_experiment_state(
client: Client,
*,
username: str,
projname: str,
cuid: str,
state: Literal['FINISHED', 'CRASHED', 'ABORTED'],
finished_at: str = None,
):
"""
更新实验状态,注意此接口会将客户端标记为 pending 状态,表示实验已结束
:param client: 已登录的客户端实例
:param username: 实验所属用户名
:param projname: 实验所属项目名称
:param cuid: 实验唯一标识符
:param state: 实验状态
:param finished_at: 实验结束时间,格式为 ISO 8601,如果不提供则使用当前时间
"""
put_data = {
"state": state,
"finishedAt": finished_at,
"from": "sdk",
}
put_data = {k: v for k, v in put_data.items() if v is not None} # 移除值为None的键
client.put(f"/project/{username}/{projname}/runs/{cuid}/state", put_data)
client.pending = True
92 changes: 92 additions & 0 deletions swanlab/core_python/api/service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
"""
@author: cunyue
@file: service.py
@time: 2025/12/11 18:48
@description: 服务相关API接口
"""

import time
from concurrent.futures import ThreadPoolExecutor
from io import BytesIO
from typing import List, Tuple

import requests
from requests.exceptions import RequestException

from ..client import Client
from ...log import swanlog
from ...toolkit.models.data import MediaBuffer


def upload_file(*, url: str, buffer: BytesIO, max_retries=3):
"""
上传文件到COS
:param url: COS上传URL
:param buffer: 文件内容的BytesIO对象
:param max_retries: 最大重试次数
"""
# 这里也可以创建一个 Session 对象复用 TCP 连接
with requests.Session() as session:
for attempt in range(1, max_retries + 1):
try:
buffer.seek(0)
response = session.put(
url,
data=buffer,
headers={'Content-Type': 'application/octet-stream'},
timeout=30,
)
response.raise_for_status()
return
except RequestException:
swanlog.warning("Upload attempt {} failed for URL: {}".format(attempt, url))
# 如果是最后一次尝试,抛出异常
if attempt == max_retries:
raise
# 简单的指数退避(等待 1s, 2s, 4s...)
time.sleep(2 ** (attempt - 1))


def upload_to_cos(client: Client, *, cuid: str, buffers: List[MediaBuffer]):
"""
上传文件到COS
:param client: 对应的客户端实例
:param cuid: 实验cuid
:param buffers: 媒体数据缓冲区
"""
failed_buffers: List[Tuple[str, MediaBuffer]] = []
# 1. 后端签名
data, _ = client.post(
'/resources/presigned/put',
{"experimentId": cuid, "paths": [buffer.file_name for buffer in buffers]},
)
urls: List[str] = data['urls']
# 2. 并发上传
# executor.submit可能会失败,因为线程数有限或者线程池已经关闭
# 来自此issue: https://github.com/SwanHubX/SwanLab/issues/889,此时需要一个个发送
with ThreadPoolExecutor(max_workers=10) as executor:
futures = []
assert len(urls) == len(buffers), "URLs and buffers length mismatch"
# 2.1 在线程中并发上传
for index, buffer in enumerate(buffers):
url = urls[index]
try:
future = executor.submit(upload_file, url=url, buffer=buffer)
futures.append((future, url, buffer))
except RuntimeError:
failed_buffers.append((url, buffer))
# 2.2 收集结果
for future, url, buffer in futures:
try:
future.result()
except Exception as e:
swanlog.warning(f"Failed to upload {url}: {e}, will retry...")
failed_buffers.append((url, buffer))
# 3. 重试失败的buffer,重新上传
if len(failed_buffers):
swanlog.debug("Retrying failed buffers: {}".format(len(failed_buffers)))
for url, buffer in failed_buffers:
try:
upload_file(url=url, buffer=buffer)
except Exception as e:
swanlog.error(f"Failed to upload {url}: {e}")
2 changes: 1 addition & 1 deletion swanlab/core_python/auth/providers/api_key.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
from rich.status import Status
from rich.text import Text

from swanlab.core_python.client.session import create_session
from swanlab.env import is_windows, is_interactive
from swanlab.error import ValidationError, APIKeyFormatError, KeyFileError
from swanlab.log import swanlog
from swanlab.package import get_setting_url, get_host_api, get_host_web, fmt_web_host, save_key as sk, get_key
from ...session import create_session


class LoginInfo:
Expand Down
Loading