Skip to content

Commit 71a9977

Browse files
committed
opal/mca/common: match efa nic by device name
EFA devices do not have SysImageGUID or NodeGUID, and therefore cannot be correctly matched with those identifiers. As a result, EFA devices are being selected in round robin regardless of affinity to the running process. This patch introduces a logical branch to match EFA nics using device name. With this change the nearest EFA nic is selected relative to the running process. Signed-off-by: Wenduo Wang <wenduwan@amazon.com>
1 parent 5ea8638 commit 71a9977

File tree

1 file changed

+49
-36
lines changed

1 file changed

+49
-36
lines changed

opal/mca/common/ofi/common_ofi.c

Lines changed: 49 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,20 @@ get_nearest_nics(int *num_distances, pmix_value_t **valin)
589589
}
590590

591591
#if OPAL_OFI_PCI_DATA_AVAILABLE
592+
static bool match_device_by_name_prefix(pmix_device_distance_t *distances, int num_distances, char *domain_name)
593+
{
594+
char *dev_name = NULL;
595+
596+
for (int i = 0; i < num_distances; ++i) {
597+
dev_name = distances[i].osname;
598+
if (dev_name && 0 == strncasecmp(dev_name, domain_name, MIN(strlen(dev_name), strlen(domain_name)))) {
599+
return true;
600+
}
601+
}
602+
603+
return false;
604+
}
605+
592606
/**
593607
* Determine if a device is nearest
594608
*
@@ -606,31 +620,19 @@ get_nearest_nics(int *num_distances, pmix_value_t **valin)
606620
*
607621
*/
608622
#if HWLOC_API_VERSION < 0x00020000
609-
static bool is_near(pmix_device_distance_t *distances,
610-
int num_distances,
611-
hwloc_topology_t topology,
612-
struct fi_pci_attr pci)
623+
static bool match_device_by_pci_bus_id(pmix_device_distance_t *distances, int num_distances,
624+
struct fi_pci_attr pci)
613625
{
614626
return true;
615627
}
616628
#else
617-
static bool is_near(pmix_device_distance_t *distances,
618-
int num_distances,
619-
hwloc_topology_t topology,
620-
struct fi_pci_attr pci)
629+
static bool match_device_by_pci_bus_id(pmix_device_distance_t *distances, int num_distances,
630+
struct fi_pci_attr pci)
621631
{
622632
hwloc_obj_t pcidev, osdev;
623633

624-
/* if we failed to find any distances, then we consider all interfaces
625-
* to be of equal distances and let the caller decide how to handle
626-
* them
627-
*/
628-
if (!distances)
629-
return true;
630-
631-
pcidev = hwloc_get_pcidev_by_busid(topology, pci.domain_id,
632-
pci.bus_id, pci.device_id,
633-
pci.function_id);
634+
pcidev = hwloc_get_pcidev_by_busid(opal_hwloc_topology, pci.domain_id, pci.bus_id,
635+
pci.device_id, pci.function_id);
634636
if (!pcidev)
635637
return false;
636638

@@ -675,8 +677,31 @@ static bool is_near(pmix_device_distance_t *distances,
675677

676678
return false;
677679
}
678-
#endif
679-
#endif // OPAL_OFI_PCI_DATA_AVAILABLE
680+
#endif /* HWLOC_API_VERSION < 0x00020000 */
681+
682+
static bool is_near(pmix_device_distance_t *distances,
683+
int num_distances,
684+
struct fi_info *fi_info)
685+
{
686+
/* if we failed to find any distances, then we consider all interfaces
687+
* to be of equal distances and let the caller decide how to handle
688+
* them
689+
*/
690+
if (!distances)
691+
return true;
692+
693+
if (fi_info && fi_info->fabric_attr && fi_info->fabric_attr->prov_name
694+
&& 0 == strcasecmp("efa", fi_info->fabric_attr->prov_name)) {
695+
return match_device_by_name_prefix(distances, num_distances, fi_info->domain_attr->name);
696+
} else if (fi_info && fi_info->nic && fi_info->nic->bus_attr
697+
&& fi_info->nic->bus_attr->bus_type == FI_BUS_PCI) {
698+
return match_device_by_pci_bus_id(distances, num_distances,
699+
fi_info->nic->bus_attr->attr.pci);
700+
}
701+
702+
return false;
703+
}
704+
#endif /* OPAL_OFI_PCI_DATA_AVAILABLE */
680705

681706
/* Count providers returns the number of providers present in an fi_info list
682707
* @param (IN) provider_list struct fi_info* list of providers available
@@ -789,7 +814,6 @@ struct fi_info *opal_common_ofi_select_provider(struct fi_info *provider_list,
789814
#if OPAL_OFI_PCI_DATA_AVAILABLE
790815
pmix_device_distance_t *distances = NULL;
791816
pmix_value_t *pmix_val;
792-
struct fi_pci_attr pci;
793817
int num_distances = 0;
794818
#endif
795819
bool near;
@@ -828,15 +852,10 @@ struct fi_info *opal_common_ofi_select_provider(struct fi_info *provider_list,
828852
/* Cycle through remaining fi_info objects, looking for alike providers */
829853
while (NULL != current_provider) {
830854
if (!check_provider_attr(provider, current_provider)) {
831-
near = false;
832855
#if OPAL_OFI_PCI_DATA_AVAILABLE
833-
if (NULL != current_provider->nic
834-
&& NULL != current_provider->nic->bus_attr
835-
&& current_provider->nic->bus_attr->bus_type == FI_BUS_PCI) {
836-
pci = current_provider->nic->bus_attr->attr.pci;
837-
near = is_near(distances, num_distances,
838-
opal_hwloc_topology, pci);
839-
}
856+
near = is_near(distances, num_distances, current_provider);
857+
#else
858+
near = false;
840859
#endif
841860
/* We could have multiple near providers */
842861
if (near && !provider_found) {
@@ -865,13 +884,7 @@ struct fi_info *opal_common_ofi_select_provider(struct fi_info *provider_list,
865884
}
866885

867886
#if OPAL_OFI_PCI_DATA_AVAILABLE
868-
if (NULL != provider->nic
869-
&& NULL != provider->nic->bus_attr
870-
&& provider->nic->bus_attr->bus_type == FI_BUS_PCI) {
871-
pci = provider->nic->bus_attr->attr.pci;
872-
near = is_near(distances, num_distances,
873-
opal_hwloc_topology, pci);
874-
}
887+
near = is_near(distances, num_distances, provider);
875888
#endif
876889

877890
#if OPAL_ENABLE_DEBUG

0 commit comments

Comments
 (0)