Skip to content

Commit b243ec6

Browse files
committed
add iommu group
1 parent 792080e commit b243ec6

File tree

11 files changed

+101
-68
lines changed

11 files changed

+101
-68
lines changed

cloud/blockstore/libs/nvme/nvme.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,16 @@ struct TControllerData
2121
ui64 Capacity = 0;
2222
};
2323

24-
struct TPCIAddress
24+
struct TPCIDeviceInfo
2525
{
2626
ui16 VendorId = 0;
2727
ui16 DeviceId = 0;
28+
2829
TString Address;
2930

30-
bool operator == (const TPCIAddress&) const = default;
31+
std::optional<ui32> IOMMUGroup;
32+
33+
[[nodiscard]] bool operator == (const TPCIDeviceInfo&) const = default;
3134
[[nodiscard]] explicit operator bool () const
3235
{
3336
return VendorId != 0 && DeviceId != 0 && !Address.empty();
@@ -52,13 +55,13 @@ struct INvmeManager
5255
virtual TResultOrError<TString> GetSerialNumber(const TString& path) = 0;
5356
virtual TResultOrError<TVector<TControllerData>> ListControllers() = 0;
5457

55-
virtual TResultOrError<TPCIAddress> GetPCIAddress(
58+
virtual TResultOrError<TPCIDeviceInfo> GetPCIDeviceInfo(
5659
const TString& devicePath) = 0;
5760

58-
virtual TResultOrError<TString> GetDriverName(const TPCIAddress& pci) = 0;
61+
virtual TResultOrError<TString> GetDriverName(const TPCIDeviceInfo& pci) = 0;
5962

60-
virtual NProto::TError BindToVFIO(const TPCIAddress& pci) = 0;
61-
virtual NProto::TError BindToNVME(const TPCIAddress& pci) = 0;
63+
virtual NProto::TError BindToVFIO(const TPCIDeviceInfo& pci) = 0;
64+
virtual NProto::TError BindToNVME(const TPCIDeviceInfo& pci) = 0;
6265
};
6366

6467
////////////////////////////////////////////////////////////////////////////////

cloud/blockstore/libs/nvme/nvme_linux.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -351,11 +351,11 @@ class TNvmeManager final
351351
});
352352
}
353353

354-
TResultOrError<TPCIAddress> GetPCIAddress(const TString& devicePath) final
354+
TResultOrError<TPCIDeviceInfo> GetPCIDeviceInfo(const TString& devicePath) final
355355
{
356356
namespace NFs = std::filesystem;
357357

358-
return SafeExecute<TResultOrError<TPCIAddress>>(
358+
return SafeExecute<TResultOrError<TPCIDeviceInfo>>(
359359
[&]
360360
{
361361
const auto filename = NFs::path{devicePath.c_str()}.filename();
@@ -366,15 +366,24 @@ class TNvmeManager final
366366
const auto base = NFs::path{"/sys/bus/pci/devices"} /
367367
std::string_view{address};
368368

369-
return TPCIAddress{
369+
std::optional<ui32> iommuGroup;
370+
if (NFs::exists(base / "iommu_group")) {
371+
iommuGroup = std::stoi(
372+
NFs::read_symlink(base / "iommu_group")
373+
.filename()
374+
.string());
375+
}
376+
377+
return TPCIDeviceInfo{
370378
.VendorId = ReadFileHex(base / "vendor"),
371379
.DeviceId = ReadFileHex(base / "device"),
372380
.Address = std::move(address),
381+
.IOMMUGroup = iommuGroup,
373382
};
374383
});
375384
}
376385

377-
TResultOrError<TString> GetDriverName(const TPCIAddress& pci) final
386+
TResultOrError<TString> GetDriverName(const TPCIDeviceInfo& pci) final
378387
{
379388
namespace NFs = std::filesystem;
380389

@@ -399,7 +408,7 @@ class TNvmeManager final
399408
});
400409
}
401410

402-
NProto::TError BindToVFIO(const TPCIAddress& pci) final
411+
NProto::TError BindToVFIO(const TPCIDeviceInfo& pci) final
403412
{
404413
if (!pci) {
405414
return MakeError(E_ARGUMENT, "empty PCI address");
@@ -411,6 +420,8 @@ class TNvmeManager final
411420
{
412421
TFileOutput out("/sys/bus/pci/drivers/vfio-pci/new_id");
413422
out << Hex(pci.VendorId, {}) << " " << Hex(pci.DeviceId, {});
423+
424+
return NProto::TError();
414425
});
415426

416427
if (HasError(error)) {
@@ -420,7 +431,7 @@ class TNvmeManager final
420431
return RebindDriver(pci, NVMeDriverName, VFIODriverName);
421432
}
422433

423-
NProto::TError BindToNVME(const TPCIAddress& pci) final
434+
NProto::TError BindToNVME(const TPCIDeviceInfo& pci) final
424435
{
425436
if (!pci) {
426437
return MakeError(E_ARGUMENT, "empty PCI address");
@@ -430,7 +441,7 @@ class TNvmeManager final
430441
}
431442

432443
auto RebindDriver(
433-
const TPCIAddress& pci,
444+
const TPCIDeviceInfo& pci,
434445
const TString& from,
435446
const TString& to) -> NProto::TError
436447
{

cloud/blockstore/libs/nvme/nvme_stub.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,33 +61,33 @@ class TNvmeManagerStub final
6161
return IsDeviceSsd;
6262
}
6363

64-
TResultOrError<TPCIAddress> GetPCIAddress(const TString& path) final
64+
TResultOrError<TPCIDeviceInfo> GetPCIDeviceInfo(const TString& path) final
6565
{
6666
Y_UNUSED(path);
6767

68-
return TPCIAddress{};
68+
return TPCIDeviceInfo{};
6969
}
7070

7171
TResultOrError<TVector<TControllerData>> ListControllers() final
7272
{
7373
return TVector<TControllerData>{};
7474
}
7575

76-
TResultOrError<TString> GetDriverName(const TPCIAddress& pci) final
76+
TResultOrError<TString> GetDriverName(const TPCIDeviceInfo& pci) final
7777
{
7878
Y_UNUSED(pci);
7979

8080
return TString{};
8181
}
8282

83-
NProto::TError BindToVFIO(const TPCIAddress& pci) final
83+
NProto::TError BindToVFIO(const TPCIDeviceInfo& pci) final
8484
{
8585
Y_UNUSED(pci);
8686

8787
return {};
8888
}
8989

90-
NProto::TError BindToNVME(const TPCIAddress& pci) final
90+
NProto::TError BindToNVME(const TPCIDeviceInfo& pci) final
9191
{
9292
Y_UNUSED(pci);
9393

cloud/blockstore/libs/service_local/safe_deallocator_ut.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,33 +70,33 @@ struct TTestNvmeManager final: NNvme::INvmeManager
7070
return TString("serial");
7171
}
7272

73-
TResultOrError<TPCIAddress> GetPCIAddress(const TString& path) final
73+
TResultOrError<TPCIDeviceInfo> GetPCIDeviceInfo(const TString& path) final
7474
{
7575
Y_UNUSED(path);
7676

77-
return TPCIAddress{};
77+
return TPCIDeviceInfo{};
7878
}
7979

8080
TResultOrError<TVector<TControllerData>> ListControllers() final
8181
{
8282
return TVector<TControllerData>{};
8383
}
8484

85-
TResultOrError<TString> GetDriverName(const TPCIAddress& pci) final
85+
TResultOrError<TString> GetDriverName(const TPCIDeviceInfo& pci) final
8686
{
8787
Y_UNUSED(pci);
8888

8989
return TString{};
9090
}
9191

92-
NProto::TError BindToVFIO(const TPCIAddress& pci) final
92+
NProto::TError BindToVFIO(const TPCIDeviceInfo& pci) final
9393
{
9494
Y_UNUSED(pci);
9595

9696
return {};
9797
}
9898

99-
NProto::TError BindToNVME(const TPCIAddress& pci) final
99+
NProto::TError BindToNVME(const TPCIDeviceInfo& pci) final
100100
{
101101
Y_UNUSED(pci);
102102

cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_monitoring.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22

33
#include <cloud/blockstore/libs/rdma/iface/server.h>
44
#include <cloud/blockstore/libs/storage/disk_common/monitoring_utils.h>
5+
56
#include <cloud/storage/core/libs/common/format.h>
67

78
#include <library/cpp/monlib/service/pages/templates.h>
89

10+
#include <util/stream/format.h>
911
#include <util/stream/str.h>
1012

1113
namespace NCloud::NBlockStore::NStorage {
@@ -89,6 +91,7 @@ void TDiskAgentActor::RenderNVMeDevices(IOutputStream& out) const
8991
TABLEH() { out << "Model"; }
9092
TABLEH() { out << "Capacity"; }
9193
TABLEH() { out << "PCI"; }
94+
TABLEH() { out << "IOMMU group"; }
9295
}
9396

9497
for (const auto& d: devices) {
@@ -100,10 +103,15 @@ void TDiskAgentActor::RenderNVMeDevices(IOutputStream& out) const
100103
<< d.GetCapacity() << " B)";
101104
}
102105
TABLED () {
103-
out << d.GetPCIVendorId() << ":"
104-
<< d.GetPCIDeviceId() << " "
106+
out << "(" << Hex(d.GetPCIVendorId()) << " "
107+
<< Hex(d.GetPCIDeviceId()) << ") "
105108
<< d.GetPCIAddress();
106109
}
110+
TABLED () {
111+
if (d.HasIOMMUGroup()) {
112+
out << d.GetIOMMUGroup();
113+
}
114+
}
107115
}
108116
}
109117
}

cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_ut.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -118,33 +118,33 @@ struct TTestNvmeManager
118118
return true;
119119
}
120120

121-
TResultOrError<NNvme::TPCIAddress> GetPCIAddress(const TString& path) final
121+
TResultOrError<NNvme::TPCIDeviceInfo> GetPCIDeviceInfo(const TString& path) final
122122
{
123123
Y_UNUSED(path);
124124

125-
return NNvme::TPCIAddress{};
125+
return NNvme::TPCIDeviceInfo{};
126126
}
127127

128128
TResultOrError<TVector<NNvme::TControllerData>> ListControllers() final
129129
{
130130
return TVector<NNvme::TControllerData>{};
131131
}
132132

133-
TResultOrError<TString> GetDriverName(const NNvme::TPCIAddress& pci) final
133+
TResultOrError<TString> GetDriverName(const NNvme::TPCIDeviceInfo& pci) final
134134
{
135135
Y_UNUSED(pci);
136136

137137
return TString{};
138138
}
139139

140-
NProto::TError BindToVFIO(const NNvme::TPCIAddress& pci) final
140+
NProto::TError BindToVFIO(const NNvme::TPCIDeviceInfo& pci) final
141141
{
142142
Y_UNUSED(pci);
143143

144144
return {};
145145
}
146146

147-
NProto::TError BindToNVME(const NNvme::TPCIAddress& pci) final
147+
NProto::TError BindToNVME(const NNvme::TPCIDeviceInfo& pci) final
148148
{
149149
Y_UNUSED(pci);
150150

cloud/blockstore/libs/storage/disk_agent/disk_agent_state_ut.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -95,33 +95,33 @@ struct TTestNvmeManager
9595
return true;
9696
}
9797

98-
TResultOrError<NNvme::TPCIAddress> GetPCIAddress(const TString& path) final
98+
TResultOrError<NNvme::TPCIDeviceInfo> GetPCIDeviceInfo(const TString& path) final
9999
{
100100
Y_UNUSED(path);
101101

102-
return NNvme::TPCIAddress{};
102+
return NNvme::TPCIDeviceInfo{};
103103
}
104104

105105
TResultOrError<TVector<NNvme::TControllerData>> ListControllers() final
106106
{
107107
return TVector<NNvme::TControllerData>{};
108108
}
109109

110-
TResultOrError<TString> GetDriverName(const NNvme::TPCIAddress& pci) final
110+
TResultOrError<TString> GetDriverName(const NNvme::TPCIDeviceInfo& pci) final
111111
{
112112
Y_UNUSED(pci);
113113

114114
return TString{};
115115
}
116116

117-
NProto::TError BindToVFIO(const NNvme::TPCIAddress& pci) final
117+
NProto::TError BindToVFIO(const NNvme::TPCIDeviceInfo& pci) final
118118
{
119119
Y_UNUSED(pci);
120120

121121
return {};
122122
}
123123

124-
NProto::TError BindToNVME(const NNvme::TPCIAddress& pci) final
124+
NProto::TError BindToNVME(const NNvme::TPCIDeviceInfo& pci) final
125125
{
126126
Y_UNUSED(pci);
127127

cloud/blockstore/libs/storage/disk_agent/model/nvme_device_list.cpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,19 @@ struct TBySerialNumber
4646

4747
////////////////////////////////////////////////////////////////////////////////
4848

49-
NNvme::TPCIAddress GetPCIAddress(const NProto::TNVMeDevice& d)
49+
NNvme::TPCIDeviceInfo GetPCIDeviceInfo(const NProto::TNVMeDevice& d)
5050
{
51+
std::optional<ui32> iommuGroup;
52+
if (d.HasIOMMUGroup()) {
53+
iommuGroup = d.GetIOMMUGroup();
54+
}
55+
5156
return {
5257
.VendorId = static_cast<ui16>(d.GetPCIVendorId()),
5358
.DeviceId = static_cast<ui16>(d.GetPCIDeviceId()),
54-
.Address = d.GetPCIAddress()};
59+
.Address = d.GetPCIAddress(),
60+
.IOMMUGroup = iommuGroup,
61+
};
5562
}
5663

5764
////////////////////////////////////////////////////////////////////////////////
@@ -167,7 +174,7 @@ void TNVMeDeviceList::DiscoverNVMeDevices()
167174
TBySerialNumber(spec.GetSerialNumber()));
168175

169176
if (device) {
170-
auto [pci, error] = NVMeManager->GetPCIAddress(device->DevicePath);
177+
auto [pci, error] = NVMeManager->GetPCIDeviceInfo(device->DevicePath);
171178
if (HasError(error)) {
172179
STORAGE_ERROR(
173180
"Can't get PCIe address for NVMe device "
@@ -181,8 +188,8 @@ void TNVMeDeviceList::DiscoverNVMeDevices()
181188
"Found NVMe device "
182189
<< spec.GetSerialNumber().Quote() << ": " << device->ModelNumber
183190
<< " " << FormatByteSize(device->Capacity) << " ("
184-
<< Hex(pci.VendorId) << ":" << Hex(pci.DeviceId) << " "
185-
<< pci.Address << ")");
191+
<< Hex(pci.VendorId) << " " << Hex(pci.DeviceId) << " "
192+
<< pci.Address << " " << pci.IOMMUGroup << ")");
186193

187194
NProto::TNVMeDevice& d = NVMeDevices.emplace_back();
188195
d.SetSerialNumber(device->SerialNumber);
@@ -191,6 +198,9 @@ void TNVMeDeviceList::DiscoverNVMeDevices()
191198
d.SetPCIVendorId(pci.VendorId);
192199
d.SetPCIDeviceId(pci.DeviceId);
193200
d.SetPCIAddress(pci.Address);
201+
if (pci.IOMMUGroup) {
202+
d.SetIOMMUGroup(pci.IOMMUGroup.value());
203+
}
194204

195205
continue;
196206
}
@@ -208,7 +218,7 @@ void TNVMeDeviceList::DiscoverNVMeDevices()
208218
// should be bound to VFIO driver
209219

210220
auto [driver, error] =
211-
NVMeManager->GetDriverName(GetPCIAddress(*cachedDeviceIt));
221+
NVMeManager->GetDriverName(GetPCIDeviceInfo(*cachedDeviceIt));
212222

213223
if (HasError(error)) {
214224
STORAGE_WARN(
@@ -260,7 +270,7 @@ NProto::TError TNVMeDeviceList::BindNVMeDeviceToVFIO(
260270
return MakeError(E_NOT_FOUND, "NVMe device not found");
261271
}
262272

263-
return NVMeManager->BindToVFIO(GetPCIAddress(*device));
273+
return NVMeManager->BindToVFIO(GetPCIDeviceInfo(*device));
264274
}
265275

266276
NProto::TError TNVMeDeviceList::ResetNVMeDevice(const TString& serialNumber)
@@ -270,7 +280,7 @@ NProto::TError TNVMeDeviceList::ResetNVMeDevice(const TString& serialNumber)
270280
return MakeError(E_NOT_FOUND, "NVMe device not found");
271281
}
272282

273-
auto error = NVMeManager->BindToNVME(GetPCIAddress(*device));
283+
auto error = NVMeManager->BindToNVME(GetPCIDeviceInfo(*device));
274284
if (HasError(error)) {
275285
STORAGE_ERROR(
276286
"Can't bind NVMe device "

0 commit comments

Comments
 (0)