@@ -36,8 +36,8 @@ Yes, this will be a complement to ray core's ability to flexibly schedule actors
36
36
37
37
Actor affinity/anti-affinity schedule API Design
38
38
1 . Scheduling Strategy adds an ActorAffinitySchedulingStrategy.
39
- 2 . This strategy consists of several ActorAffinityMatchExpressions .
40
- 3 . ActorAffinityMatchExpression has 4 match types which are IN/NOT_IN/EXISTS/DOES_NOT_EXIST
39
+ 2 . This strategy consists of several LabelMatchExpressions .
40
+ 3 . LabelMatchExpression has 4 match types which are IN/NOT_IN/EXISTS/DOES_NOT_EXIST
41
41
42
42
Use Case | ActorAffinityOperator
43
43
-- | --
@@ -72,10 +72,10 @@ SchedulingStrategyT = Union[None, str,
72
72
ActorAffinitySchedulingStrategy]
73
73
74
74
class ActorAffinitySchedulingStrategy :
75
- def __init__ (self , match_expressions : List[ActorAffinityMatchExpression ]):
75
+ def __init__ (self , match_expressions : List[LabelMatchExpression ]):
76
76
self .match_expressions = match_expressions
77
77
78
- class ActorAffinityMatchExpression :
78
+ class LabelMatchExpression :
79
79
""" An expression used to represent an actor's affinity.
80
80
Attributes:
81
81
key: the key of label
@@ -84,7 +84,7 @@ class ActorAffinityMatchExpression:
84
84
values: a list of label value
85
85
soft: ...
86
86
"""
87
- def __init__ (self , key : str , operator : ActorAffinityOperator ,
87
+ def __init__ (self , key : str , operator : LabelMatchOperator ,
88
88
values : List[str ], soft : bool ):
89
89
self .key = key
90
90
self .operator = operator
@@ -106,7 +106,7 @@ Step 2: Set actor affinity strategy.
106
106
1 . The target actor is expected to be scheduled with the actors whose label key is "location" and value in [ "dc-1"] .
107
107
``` python
108
108
match_expressions = [
109
- ActorAffinityMatchExpression (" location" , ActorAffinityOperator .IN , [" dc_1" ], False )
109
+ LabelMatchExpression (" location" , LabelMatchOperator .IN , [" dc_1" ], False )
110
110
]
111
111
actor_affinity_strategy = ActorAffinitySchedulingStrategy(match_expressions)
112
112
actor = Actor.options(scheduling_strategy = actor_affinity_strategy).remote()
@@ -116,7 +116,7 @@ actor = Actor.options(scheduling_strategy = actor_affinity_strategy).remote()
116
116
with the actors whose label key is "location" and value in [ "dc-1"] .
117
117
``` python
118
118
match_expressions = [
119
- ActorAffinityMatchExpression (" location" , ActorAffinityOperator .NOT_IN , [" dc_1" ], False )
119
+ LabelMatchExpression (" location" , LabelMatchOperator .NOT_IN , [" dc_1" ], False )
120
120
]
121
121
actor_affinity_strategy = ActorAffinitySchedulingStrategy(match_expressions)
122
122
actor = Actor.options(scheduling_strategy = actor_affinity_strategy).remote()
@@ -125,7 +125,7 @@ actor = Actor.options(scheduling_strategy = actor_affinity_strategy).remote()
125
125
3 . The target actor is expected to be scheduled with the actors whose label key exists "location".
126
126
``` python
127
127
match_expressions = [
128
- ActorAffinityMatchExpression (" location" , ActorAffinityOperator .EXISTS , [], False )
128
+ LabelMatchExpression (" location" , LabelMatchOperator .EXISTS , [], False )
129
129
]
130
130
actor_affinity_strategy = ActorAffinitySchedulingStrategy(match_expressions)
131
131
actor = Actor.options(scheduling_strategy = actor_affinity_strategy).remote()
@@ -134,7 +134,7 @@ actor = Actor.options(scheduling_strategy = actor_affinity_strategy).remote()
134
134
4 . The target actor is not expected to be scheduled with the actors whose label key exists "location".
135
135
``` python
136
136
match_expressions = [
137
- ActorAffinityMatchExpression (" location" , ActorAffinityOperator.DOES_NOT_EXIST , [], False )
137
+ LabelMatchExpression (" location" , ActorAffinityOperator.DOES_NOT_EXIST , [], False )
138
138
]
139
139
actor_affinity_strategy = ActorAffinitySchedulingStrategy(match_expressions)
140
140
actor = Actor.options(scheduling_strategy = actor_affinity_strategy).remote()
@@ -143,8 +143,8 @@ actor = Actor.options(scheduling_strategy = actor_affinity_strategy).remote()
143
143
5 . You can also set multiple expressions at the same time, and multiple expressions need to be satisfied when scheduling.
144
144
``` python
145
145
match_expressions = [
146
- ActorAffinityMatchExpression (" location" , ActorAffinityOperator .DOES_NOT_EXIST , [], False ),
147
- ActorAffinityMatchExpression (" version" , ActorAffinityOperator .EXISTS , [], False )
146
+ LabelMatchExpression (" location" , LabelMatchOperator .DOES_NOT_EXIST , [], False ),
147
+ LabelMatchExpression (" version" , LabelMatchOperator .EXISTS , [], False )
148
148
]
149
149
actor_affinity_strategy = ActorAffinitySchedulingStrategy(match_expressions)
150
150
actor = Actor.options(scheduling_strategy = actor_affinity_strategy).remote()
@@ -181,70 +181,70 @@ Set the labels for this actor API
181
181
Actor affinity scheduling strategy API
182
182
``` java
183
183
public class ActorAffinitySchedulingStrategy implements SchedulingStrategy {
184
- private ActorAffinitySchedulingStrategy (List<ActorAffinityMatchExpression > expressions ) {
184
+ private ActorAffinitySchedulingStrategy (List<LabelMatchExpression > expressions ) {
185
185
}
186
186
187
- public class ActorAffinityMatchExpression {
187
+ public class LabelMatchExpression {
188
188
private String key;
189
- private ActorAffinityOperator operator;
189
+ private LabelMatchOperator operator;
190
190
private List<String > values;
191
191
private boolean isSoft;
192
192
193
193
/**
194
194
* Returns an affinity expression to indicate that the target actor is expected to be scheduled
195
195
* with the actors whose label meets one of the composed key and values. eg:
196
- * ActorAffinityMatchExpression .in("location", new ArrayList<>() {{ add("dc-1");}}, false).
196
+ * LabelMatchExpression .in("location", new ArrayList<>() {{ add("dc-1");}}, false).
197
197
*
198
198
* @param key The key of label.
199
199
* @param values A list of label values.
200
200
* @param isSoft If true, the actor will be scheduled even there's no matched actor.
201
- * @return ActorAffinityMatchExpression .
201
+ * @return LabelMatchExpression .
202
202
*/
203
- public static ActorAffinityMatchExpression in (String key , List<String > values , boolean isSoft ) {
204
- return new ActorAffinityMatchExpression (key, ActorAffinityOperator . IN , values, isSoft);
203
+ public static LabelMatchExpression in (String key , List<String > values , boolean isSoft ) {
204
+ return new LabelMatchExpression (key, LabelMatchOperator . IN , values, isSoft);
205
205
}
206
206
207
207
/**
208
208
* Returns an affinity expression to indicate that the target actor is not expected to be
209
209
* scheduled with the actors whose label meets one of the composed key and values. eg:
210
- * ActorAffinityMatchExpression .notIn( "location", new ArrayList<>() {{ add("dc-1");}}, false).
210
+ * LabelMatchExpression .notIn( "location", new ArrayList<>() {{ add("dc-1");}}, false).
211
211
*
212
212
* @param key The key of label.
213
213
* @param values A list of label values.
214
214
* @param isSoft If true, the actor will be scheduled even there's no matched actor.
215
- * @return ActorAffinityMatchExpression .
215
+ * @return LabelMatchExpression .
216
216
*/
217
- public static ActorAffinityMatchExpression notIn (
217
+ public static LabelMatchExpression notIn (
218
218
String key , List<String > values , boolean isSoft ) {
219
- return new ActorAffinityMatchExpression (key, ActorAffinityOperator . NOT_IN , values, isSoft);
219
+ return new LabelMatchExpression (key, LabelMatchOperator . NOT_IN , values, isSoft);
220
220
}
221
221
222
222
/**
223
223
* Returns an affinity expression to indicate that the target actor is expected to be scheduled
224
224
* with the actors whose labels exists the specified key. eg:
225
- * ActorAffinityMatchExpression .exists("location", false).
225
+ * LabelMatchExpression .exists("location", false).
226
226
*
227
227
* @param key The key of label.
228
228
* @param isSoft If true, the actor will be scheduled even there's no matched actor.
229
- * @return ActorAffinityMatchExpression .
229
+ * @return LabelMatchExpression .
230
230
*/
231
- public static ActorAffinityMatchExpression exists (String key , boolean isSoft ) {
232
- return new ActorAffinityMatchExpression (
233
- key, ActorAffinityOperator . EXISTS , new ArrayList<String > (), isSoft);
231
+ public static LabelMatchExpression exists (String key , boolean isSoft ) {
232
+ return new LabelMatchExpression (
233
+ key, LabelMatchOperator . EXISTS , new ArrayList<String > (), isSoft);
234
234
}
235
235
236
236
/**
237
237
* Returns an affinity expression to indicate that the target actor is not expected to be
238
238
* scheduled with the actors whose labels exists the specified key. eg:
239
- * ActorAffinityMatchExpression .doesNotExist("location", false).
239
+ * LabelMatchExpression .doesNotExist("location", false).
240
240
*
241
241
* @param key The key of label.
242
242
* @param isSoft If true, the actor will be scheduled even there's no matched actor.
243
- * @return ActorAffinityMatchExpression .
243
+ * @return LabelMatchExpression .
244
244
*/
245
- public static ActorAffinityMatchExpression doesNotExist (String key , boolean isSoft ) {
246
- return new ActorAffinityMatchExpression (
247
- key, ActorAffinityOperator . DOES_NOT_EXIST , new ArrayList<String > (), isSoft);
245
+ public static LabelMatchExpression doesNotExist (String key , boolean isSoft ) {
246
+ return new LabelMatchExpression (
247
+ key, LabelMatchOperator . DOES_NOT_EXIST , new ArrayList<String > (), isSoft);
248
248
}
249
249
250
250
}
@@ -282,7 +282,7 @@ locationValues.add("dc_1");
282
282
locationValues. add(" dc_2" );
283
283
ActorAffinitySchedulingStrategy schedulingStrategy =
284
284
new ActorAffinitySchedulingStrategy .Builder ()
285
- .addExpression(ActorAffinityMatchExpression . in(" location" , locationValues, false ))
285
+ .addExpression(LabelMatchExpression . in(" location" , locationValues, false ))
286
286
.build();
287
287
ActorHandle<Counter > actor2 =
288
288
Ray . actor(Counter :: new , 1). setSchedulingStrategy(schedulingStrategy). remote();
@@ -294,7 +294,7 @@ List<String> values = new ArrayList<>();
294
294
values.add("dc-1");
295
295
ActorAffinitySchedulingStrategy schedulingStrategyNotIn =
296
296
new ActorAffinitySchedulingStrategy.Builder()
297
- .addExpression(ActorAffinityMatchExpression .notIn("location", values, false))
297
+ .addExpression(LabelMatchExpression .notIn("location", values, false))
298
298
.build();
299
299
ActorHandle<Counter> actor3 =
300
300
Ray.actor(Counter::new, 1).setSchedulingStrategy(schedulingStrategyNotIn).remote();
@@ -304,7 +304,7 @@ ActorHandle<Counter> actor3 =
304
304
```java
305
305
ActorAffinitySchedulingStrategy schedulingStrategyExists =
306
306
new ActorAffinitySchedulingStrategy.Builder()
307
- .addExpression(ActorAffinityMatchExpression .exists("version", false))
307
+ .addExpression(LabelMatchExpression .exists("version", false))
308
308
.build();
309
309
ActorHandle<Counter> actor4 =
310
310
Ray.actor(Counter::new, 1).setSchedulingStrategy(schedulingStrategyExists).remote();
@@ -315,7 +315,7 @@ Assert.assertEquals(actor4.task(Counter::getValue).remote().get(10000), Integer.
315
315
```java
316
316
ActorAffinitySchedulingStrategy schedulingStrategyDoesNotExist =
317
317
new ActorAffinitySchedulingStrategy .Builder ()
318
- .addExpression(ActorAffinityMatchExpression . doesNotExist(" version" , false ))
318
+ .addExpression(LabelMatchExpression . doesNotExist(" version" , false ))
319
319
.build();
320
320
ActorHandle<Counter > actor5 =
321
321
Ray . actor(Counter :: new , 1). setSchedulingStrategy(schedulingStrategyDoesNotExist). remote();
@@ -325,8 +325,8 @@ ActorHandle<Counter> actor5 =
325
325
```java
326
326
ActorAffinitySchedulingStrategy schedulingStrategy =
327
327
new ActorAffinitySchedulingStrategy .Builder ()
328
- .addExpression(ActorAffinityMatchExpression . doesNotExist(" version" , false ))
329
- .addExpression(ActorAffinityMatchExpression . Exists(" location" , false ))
328
+ .addExpression(LabelMatchExpression . doesNotExist(" version" , false ))
329
+ .addExpression(LabelMatchExpression . Exists(" location" , false ))
330
330
.build();
331
331
ActorHandle<Counter > actor6 =
332
332
Ray . actor(Counter :: new , 1). setSchedulingStrategy(schedulingStrategy). remote();
@@ -412,10 +412,10 @@ message ResourcesData {
412
412
// heartbeat enabled.
413
413
bool resources_available_changed = 3 ;
414
414
415
- // Map<key , Map<value, reference_count>> Actors scheduled to this node and actor labels information
416
- repeat Map<string, Map<string, int> > actor_labels = 15
415
+ // Map<label_type , Map<namespace, Map<label_key, label_value>>> Actors/Tasks/Nodes labels information
416
+ repeat Map<string, Map<string, Map<string, string> > > labels = 15
417
417
// Whether the actors of this node is changed.
418
- bool actor_labels_changed = 16 ,
418
+ bool labels_changed = 16 ,
419
419
}
420
420
421
421
@@ -465,7 +465,7 @@ Actor scheduling flowchart:
465
465
! [Actor scheduling flowchart](https: // user-images.githubusercontent.com/11072802/202128385-f72609c5-308d-4210-84ff-bf3ba6df381c.png)
466
466
467
467
Node Resources synchronization mechanism:
468
- ! [Node Resources synchronization mechanism](https: // user-images.githubusercontent.com/11072802/202128406-b4745e6e-3565-41a2-bfe3-78843379bf09 .png)
468
+ ! [Node Resources synchronization mechanism](https: // user-images.githubusercontent.com/11072802/203783157-fad67f25-b046-49ac-b201-b54942073823 .png)
469
469
470
470
4. Scheduling optimization through ActorLabels
471
471
Now any node raylet has ActorLabels information for all nodes.
@@ -491,16 +491,6 @@ class GcsLabelManager {
491
491
}
492
492
```
493
493
494
- < b> VS . putting Labels into the custom resource solution < b>
495
- < b> Advantages : < b>
496
- 1. Compared with the scheme of putting Labels in the custom resource. This scheme can also reuse the resource synchronization mechanism. Then it won' t destroy the concept of coustrom resouce.
497
- 2. The Label index table of all nodes can be constructed from the ActorLabels information of each node. If you use Custom Resource, you can' t build.
498
- 3. If the Custom Resouces scheme is used, the accuracy of custom resouces scheduling will be affected. The current scheme is completely independent of existing scheduling policies and resources and will not affect them. The code is also more concise.
499
-
500
-
501
- < b> DisAdvantages : < b>
502
- 1. The interface for resource reporting and updating needs to be adapted to ActorLabels in ResouceData.
503
-
504
494
< b> Issue<b>
505
495
1. Because there must be a delay in resource synchronization under raylet scheduling. So if actor affinity is Soft semantics, there will be inaccurate scheduling.
506
496
@@ -523,8 +513,40 @@ This solution is to learn the PodAffinity/NodeAffinity features of K8s。
523
513
https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#affinity-and-anti-affinity
524
514
525
515
### what' s the alternative to achieve the same goal?
526
- 1、Ray
527
- Now ray placement group can achieve the same goal. But PG is too heavy and complicated to be user friendly
516
+ < b> Option 2 : putting Labels into the custom resource solution < b>
517
+ ```
518
+ class ResourceRequest {
519
+ absl:: flat_hash_map< ResourceID , FixedPoint > resources_;
520
+ }
521
+
522
+ Add Actor / Task / Node labels to resources_ as custom resources.
523
+ eg:
524
+ {
525
+ " CPU" : 16 ,
526
+ " memory" : xx,
527
+ " custom_resources" : xx,
528
+ " actor_labels_key@value" : 1 ,
529
+ " task_labels_key@value" : 1 ,
530
+ " node_labels_key@value" : 1 ,
531
+ }
532
+ ```
533
+ If you put labels into custom_resources, you need to do the following adaptation:
534
+ 1. Compared with custom_resources, labels need to add a specific prefix to distinguish them from custom_resources.
535
+ 2. The key and value of Labels need to be concatenated with special characters (@ ).
536
+ 3. When using Labels to build a Labels index table, you need to parse the resources key.
537
+
538
+ < b> DisAdvantages : < b>
539
+ 1. Labels unlike cpu resource these are numeric types. Compared with the above scheme. This will destroy the concept of coustrom resouce.
540
+ 2. Actor and Task are isolated by namespace. It is difficult to isolate through namespace if adding custom_resource.
541
+ 2. The Label index table of all nodes can be constructed from the ActorLabels information of each node. If you use Custom Resource , this requires parsing the resource_key and doing a lot of string splitting which will cost performance.
542
+ 3. If custom_resource happens to be the same as the spliced string of labels. Then it will affect the correctness of scheduling.
543
+
544
+
545
+ < b> Advantages : < b>
546
+ 1. The interface for resource reporting and updating don't modify.
547
+
548
+
549
+
528
550
## Compatibility, Deprecation, and Migration Plan
529
551
530
552
## Test Plan and Acceptance Criteria
@@ -536,7 +558,7 @@ All APIs will be fully unit tested. All specifications in this documentation wil
536
558
Later, if necessary, you can extend the semantics of "OR" by adding "is_or_semantics" to ActorAffinitySchedulingStrategy.
537
559
```
538
560
class ActorAffinitySchedulingStrategy :
539
- def __init__(self, match_expressions: List[ActorAffinityMatchExpression ], is_or_semantics = false):
561
+ def __init__(self, match_expressions: List[LabelMatchExpression ], is_or_semantics = false):
540
562
self.match_expressions = match_expressions
541
563
self.is_or_semantics =
542
564
```
0 commit comments