@@ -25,6 +25,7 @@ import (
25
25
batchv1 "k8s.io/api/batch/v1"
26
26
corev1 "k8s.io/api/core/v1"
27
27
schedulingv1 "k8s.io/api/scheduling/v1"
28
+ "k8s.io/apimachinery/pkg/api/resource"
28
29
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
29
30
"k8s.io/apimachinery/pkg/runtime"
30
31
"k8s.io/apimachinery/pkg/runtime/schema"
@@ -526,10 +527,7 @@ func TestAllResourcesCreated(t *testing.T) {
526
527
for i := 0 ; i < 5 ; i ++ {
527
528
f .expectCreatePodAction (fmjc .newWorker (mpiJobCopy , i ))
528
529
}
529
- if implementation == kubeflow .MPIImplementationIntel ||
530
- implementation == kubeflow .MPIImplementationMPICH {
531
- f .expectCreateServiceAction (newLauncherService (mpiJobCopy ))
532
- }
530
+ f .expectCreateServiceAction (newLauncherService (mpiJobCopy ))
533
531
f .expectCreateJobAction (fmjc .newLauncherJob (mpiJobCopy ))
534
532
535
533
mpiJobCopy .Status .Conditions = []kubeflow.JobCondition {newCondition (kubeflow .JobCreated , corev1 .ConditionTrue , mpiJobCreatedReason , "MPIJob default/foo is created." )}
@@ -822,10 +820,7 @@ func TestCreateSuspendedMPIJob(t *testing.T) {
822
820
t .Fatalf ("Failed creating secret" )
823
821
}
824
822
f .expectCreateSecretAction (secret )
825
- if implementation == kubeflow .MPIImplementationIntel ||
826
- implementation == kubeflow .MPIImplementationMPICH {
827
- f .expectCreateServiceAction (newLauncherService (mpiJob ))
828
- }
823
+ f .expectCreateServiceAction (newLauncherService (mpiJob ))
829
824
830
825
// expect creating of the launcher
831
826
fmjc := f .newFakeMPIJobController ()
@@ -887,6 +882,7 @@ func TestSuspendedRunningMPIJob(t *testing.T) {
887
882
888
883
// setup objects
889
884
scheme .Scheme .Default (mpiJob )
885
+ f .setUpService (newLauncherService (mpiJob ))
890
886
f .setUpService (newWorkersService (mpiJob ))
891
887
892
888
cfgMap := newConfigMap (mpiJob , replicas )
@@ -991,6 +987,7 @@ func TestResumeMPIJob(t *testing.T) {
991
987
// expect the launcher update to resume it
992
988
launcherCopy := launcher .DeepCopy ()
993
989
launcherCopy .Spec .Suspend = pointer .Bool (false )
990
+ f .expectCreateServiceAction (newLauncherService (mpiJob ))
994
991
f .expectUpdateJobAction (launcherCopy )
995
992
996
993
// expect an update to add the conditions
@@ -1047,6 +1044,7 @@ func TestLauncherActiveWorkerNotReady(t *testing.T) {
1047
1044
configMap := newConfigMap (mpiJobCopy , replicas )
1048
1045
updateDiscoverHostsInConfigMap (configMap , mpiJobCopy , nil )
1049
1046
f .setUpConfigMap (configMap )
1047
+ f .setUpService (newLauncherService (mpiJob ))
1050
1048
f .setUpService (newWorkersService (mpiJobCopy ))
1051
1049
secret , err := newSSHAuthSecret (mpiJobCopy )
1052
1050
if err != nil {
@@ -1097,6 +1095,7 @@ func TestLauncherActiveWorkerReady(t *testing.T) {
1097
1095
1098
1096
mpiJobCopy := mpiJob .DeepCopy ()
1099
1097
scheme .Scheme .Default (mpiJobCopy )
1098
+ f .setUpService (newLauncherService (mpiJob ))
1100
1099
f .setUpService (newWorkersService (mpiJobCopy ))
1101
1100
secret , err := newSSHAuthSecret (mpiJobCopy )
1102
1101
if err != nil {
@@ -1156,6 +1155,7 @@ func TestWorkerReady(t *testing.T) {
1156
1155
1157
1156
mpiJobCopy := mpiJob .DeepCopy ()
1158
1157
scheme .Scheme .Default (mpiJobCopy )
1158
+ f .setUpService (newLauncherService (mpiJob ))
1159
1159
f .setUpService (newWorkersService (mpiJobCopy ))
1160
1160
secret , err := newSSHAuthSecret (mpiJobCopy )
1161
1161
if err != nil {
@@ -1532,20 +1532,71 @@ func TestNewLauncherAndWorker(t *testing.T) {
1532
1532
}
1533
1533
}
1534
1534
1535
+ func newReplicaSpec (name string , cpu string ) * kubeflow.ReplicaSpec {
1536
+ return & kubeflow.ReplicaSpec {
1537
+ Template : corev1.PodTemplateSpec {
1538
+ Spec : corev1.PodSpec {
1539
+ Containers : []corev1.Container {
1540
+ {
1541
+ Name : name ,
1542
+ Resources : corev1.ResourceRequirements {
1543
+ Limits : corev1.ResourceList {
1544
+ corev1 .ResourceCPU : resource .MustParse (cpu ),
1545
+ },
1546
+ },
1547
+ },
1548
+ },
1549
+ },
1550
+ },
1551
+ }
1552
+ }
1553
+
1535
1554
func TestNewConfigMap (t * testing.T ) {
1536
1555
testCases := map [string ]struct {
1537
1556
mpiJob * kubeflow.MPIJob
1538
1557
workerReplicas int32
1539
1558
wantCM * corev1.ConfigMap
1540
1559
}{
1541
- "OpenMPI without slots" : {
1560
+ "OpenMPI without slots, enable launcher as worker " : {
1542
1561
mpiJob : & kubeflow.MPIJob {
1543
1562
ObjectMeta : metav1.ObjectMeta {
1544
1563
Name : "openmpi-without-slots" ,
1545
1564
Namespace : "tenant-a" ,
1546
1565
},
1547
1566
Spec : kubeflow.MPIJobSpec {
1548
1567
MPIImplementation : kubeflow .MPIImplementationOpenMPI ,
1568
+ MPIReplicaSpecs : map [kubeflow.MPIReplicaType ]* kubeflow.ReplicaSpec {
1569
+ kubeflow .MPIReplicaTypeLauncher : newReplicaSpec ("launcher" , "2" ),
1570
+ kubeflow .MPIReplicaTypeWorker : newReplicaSpec ("worker" , "2" ),
1571
+ },
1572
+ },
1573
+ },
1574
+ workerReplicas : 2 ,
1575
+ wantCM : & corev1.ConfigMap {
1576
+ ObjectMeta : metav1.ObjectMeta {
1577
+ Name : "openmpi-without-slots-config" ,
1578
+ Namespace : "tenant-a" ,
1579
+ Labels : map [string ]string {
1580
+ "app" : "openmpi-without-slots" ,
1581
+ },
1582
+ },
1583
+ Data : map [string ]string {
1584
+ "hostfile" : "openmpi-without-slots-launcher.tenant-a.svc slots=1\n openmpi-without-slots-worker-0.openmpi-without-slots-worker.tenant-a.svc slots=1\n openmpi-without-slots-worker-1.openmpi-without-slots-worker.tenant-a.svc slots=1\n " ,
1585
+ },
1586
+ },
1587
+ },
1588
+ "OpenMPI without slots, disable launcher as worker" : {
1589
+ mpiJob : & kubeflow.MPIJob {
1590
+ ObjectMeta : metav1.ObjectMeta {
1591
+ Name : "openmpi-without-slots" ,
1592
+ Namespace : "tenant-a" ,
1593
+ },
1594
+ Spec : kubeflow.MPIJobSpec {
1595
+ MPIImplementation : kubeflow .MPIImplementationOpenMPI ,
1596
+ MPIReplicaSpecs : map [kubeflow.MPIReplicaType ]* kubeflow.ReplicaSpec {
1597
+ kubeflow .MPIReplicaTypeLauncher : newReplicaSpec ("launcher" , "1" ),
1598
+ kubeflow .MPIReplicaTypeWorker : newReplicaSpec ("worker" , "2" ),
1599
+ },
1549
1600
},
1550
1601
},
1551
1602
workerReplicas : 2 ,
@@ -1571,6 +1622,10 @@ func TestNewConfigMap(t *testing.T) {
1571
1622
Spec : kubeflow.MPIJobSpec {
1572
1623
SlotsPerWorker : pointer .Int32 (10 ),
1573
1624
MPIImplementation : kubeflow .MPIImplementationIntel ,
1625
+ MPIReplicaSpecs : map [kubeflow.MPIReplicaType ]* kubeflow.ReplicaSpec {
1626
+ kubeflow .MPIReplicaTypeLauncher : newReplicaSpec ("launcher" , "1" ),
1627
+ kubeflow .MPIReplicaTypeWorker : newReplicaSpec ("worker" , "2" ),
1628
+ },
1574
1629
},
1575
1630
},
1576
1631
workerReplicas : 1 ,
@@ -1596,6 +1651,10 @@ func TestNewConfigMap(t *testing.T) {
1596
1651
Spec : kubeflow.MPIJobSpec {
1597
1652
SlotsPerWorker : pointer .Int32 (10 ),
1598
1653
MPIImplementation : kubeflow .MPIImplementationMPICH ,
1654
+ MPIReplicaSpecs : map [kubeflow.MPIReplicaType ]* kubeflow.ReplicaSpec {
1655
+ kubeflow .MPIReplicaTypeLauncher : newReplicaSpec ("launcher" , "1" ),
1656
+ kubeflow .MPIReplicaTypeWorker : newReplicaSpec ("worker" , "2" ),
1657
+ },
1599
1658
},
1600
1659
},
1601
1660
workerReplicas : 1 ,
0 commit comments