Skip to content

Commit 7afb48b

Browse files
Fix volume auto-cleanup locking and mocking issues
1 parent 076b211 commit 7afb48b

File tree

2 files changed

+44
-28
lines changed

2 files changed

+44
-28
lines changed

src/dstack/_internal/server/background/tasks/process_idle_volumes.py

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
import datetime
22
from typing import List
33

4-
from sqlalchemy import select
4+
from sqlalchemy import select, update
55
from sqlalchemy.ext.asyncio import AsyncSession
6+
from sqlalchemy.orm import selectinload
67

78
from dstack._internal.core.models.profiles import parse_duration
89
from dstack._internal.core.models.volumes import VolumeStatus
910
from dstack._internal.server.db import get_session_ctx
1011
from dstack._internal.server.models import VolumeModel
1112
from dstack._internal.server.services.locking import get_locker
12-
from dstack._internal.server.services.volumes import delete_volumes, get_volume_configuration
13+
from dstack._internal.server.services.volumes import get_volume_configuration
1314
from dstack._internal.utils.common import get_current_datetime
1415
from dstack._internal.utils.logging import get_logger
1516

@@ -19,9 +20,10 @@
1920
async def process_idle_volumes():
2021
lock, lockset = get_locker().get_lockset(VolumeModel.__tablename__)
2122
async with get_session_ctx() as session:
23+
# Take lock, select IDs, add to lockset, release lock
2224
async with lock:
2325
res = await session.execute(
24-
select(VolumeModel)
26+
select(VolumeModel.id)
2527
.where(
2628
VolumeModel.status == VolumeStatus.ACTIVE,
2729
VolumeModel.deleted == False,
@@ -31,12 +33,21 @@ async def process_idle_volumes():
3133
.limit(10)
3234
.with_for_update(skip_locked=True)
3335
)
34-
volumes = list(res.unique().scalars().all())
35-
if not volumes:
36+
volume_ids = list(res.scalars().all())
37+
if not volume_ids:
3638
return
37-
for volume in volumes:
38-
await session.refresh(volume, ["project", "attachments"])
39-
lockset.add(volume.id)
39+
for volume_id in volume_ids:
40+
lockset.add(volume_id)
41+
42+
# Load volumes with related attributes in one query
43+
res = await session.execute(
44+
select(VolumeModel)
45+
.where(VolumeModel.id.in_(volume_ids))
46+
.options(selectinload(VolumeModel.project))
47+
.options(selectinload(VolumeModel.attachments))
48+
.execution_options(populate_existing=True)
49+
)
50+
volumes = list(res.unique().scalars().all())
4051

4152
try:
4253
to_delete = []
@@ -45,11 +56,11 @@ async def process_idle_volumes():
4556
to_delete.append(volume)
4657

4758
if to_delete:
48-
await _delete_volumes(session, to_delete)
59+
await _delete_idle_volumes(session, to_delete)
4960

5061
finally:
51-
for volume in volumes:
52-
lockset.discard(volume.id)
62+
for volume_id in volume_ids:
63+
lockset.discard(volume_id)
5364

5465

5566
def _should_delete_volume(volume: VolumeModel) -> bool:
@@ -89,16 +100,21 @@ def _get_idle_time(volume: VolumeModel) -> datetime.timedelta:
89100
return max(idle_time, datetime.timedelta(0))
90101

91102

92-
async def _delete_volumes(session: AsyncSession, volumes: List[VolumeModel]):
93-
by_project = {}
103+
async def _delete_idle_volumes(session: AsyncSession, volumes: List[VolumeModel]):
104+
"""Delete idle volumes without using the delete_volumes function to avoid locking conflicts."""
94105
for volume in volumes:
95-
project = volume.project
96-
if project not in by_project:
97-
by_project[project] = []
98-
by_project[project].append(volume.name)
99-
100-
for project, names in by_project.items():
101106
try:
102-
await delete_volumes(session, project, names)
107+
# Mark volume as deleted
108+
await session.execute(
109+
update(VolumeModel)
110+
.where(VolumeModel.id == volume.id)
111+
.values(
112+
deleted=True,
113+
deleted_at=get_current_datetime(),
114+
)
115+
)
116+
logger.info("Marked idle volume %s for deletion", volume.name)
103117
except Exception:
104-
logger.exception("Failed to delete volumes for project %s", project.name)
118+
logger.exception("Failed to mark volume %s for deletion", volume.name)
119+
120+
await session.commit()

src/tests/_internal/server/background/tasks/test_process_idle_volumes.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import datetime
2-
from unittest.mock import patch
32

43
import pytest
54
from sqlalchemy.ext.asyncio import AsyncSession
@@ -155,9 +154,10 @@ async def test_integration(self, test_db, session: AsyncSession):
155154
).replace(tzinfo=None)
156155
await session.commit()
157156

158-
with patch(
159-
"dstack._internal.server.background.tasks.process_idle_volumes.delete_volumes"
160-
) as mock:
161-
await process_idle_volumes()
162-
mock.assert_called_once()
163-
assert mock.call_args[0][2] == ["test-volume"]
157+
# Run the background task
158+
await process_idle_volumes()
159+
160+
# Refresh the volume to see if it was marked as deleted
161+
await session.refresh(volume)
162+
assert volume.deleted is True
163+
assert volume.deleted_at is not None

0 commit comments

Comments
 (0)