modelplaneai · negz · Jun 10, 2026 · Jun 6, 2026 · Jun 6, 2026 · Jun 6, 2026
diff --git a/apis/eksclusters/definition.yaml b/apis/eksclusters/definition.yaml
@@ -43,10 +43,12 @@ spec:
                 maxLength: 32
               kubernetesVersion:
                 type: string
-                default: "1.31"
+                default: "1.36"
                 description: >-
                   EKS cluster Kubernetes version. Must be a version EKS
-                  currently supports.
+                  currently supports. Defaults to a version where Dynamic
+                  Resource Allocation (how GPUs bind to pods) is generally
+                  available.
                 minLength: 1
                 maxLength: 16
               networking:

diff --git a/apis/inferenceclasses/definition.yaml b/apis/inferenceclasses/definition.yaml
@@ -21,7 +21,7 @@ spec:
         properties:
           spec:
             type: object
-            required: [resources]
+            required: [devices]
             properties:
               description:
                 type: string
@@ -50,6 +50,12 @@ spec:
                         minimum: 10
                       accelerator:
                         type: object
+                        description: >-
+                          GPU accelerator to attach when provisioning the node
+                          pool. Provisioning input only: the scheduler matches
+                          against spec.devices, not this block, so count here is
+                          the GCP machine's GPU count and need not be restated in
+                          devices.
                         required: [type, count]
                         properties:
                           type:
@@ -81,6 +87,10 @@ spec:
                         minimum: 10
                       accelerator:
                         type: object
+                        description: >-
+                          GPU accelerator to attach when provisioning the node
+                          group. Provisioning input only: the scheduler matches
+                          against spec.devices, not this block.
                         required: [type, count]
                         properties:
                           type:
@@ -95,26 +105,117 @@ spec:
                             type: integer
                             minimum: 1
                             maximum: 16
-              resources:
-                type: object
+              devices:
+                type: array
                 description: >-
-                  Hardware resources a node of this class exposes.
-                required: [gpu]
-                properties:
-                  gpu:
-                    type: object
-                    required: [count, memory]
-                    properties:
-                      count:
-                        type: integer
-                        description: GPUs per node.
-                        minimum: 1
-                        maximum: 16
-                      memory:
-                        type: string
-                        description: Per-GPU VRAM (e.g. "24Gi", "80Gi").
-                        minLength: 1
-                        maxLength: 16
+                  Devices a node of this class has, following DRA's model
+                  (KEP-4381). Each entry describes one kind of device with a
+                  count, mirroring what a DRA driver publishes in a
+                  ResourceSlice (one entry per kind rather than per physical
+                  device). ModelDeployment.nodeSelector matches against these,
+                  and claim: DRA devices are emitted as requests in a DRA
+                  ResourceClaim when scheduling a worker to this pool.
+
+                  A scheduled worker pod is pinned to its pool with a
+                  nodeSelector on the modelplane.ai/pool node label.
+                  Modelplane-provisioned (EKS, GKE) pools carry this label
+                  automatically. On a BYO (Existing) cluster Modelplane doesn't
+                  provision the nodes, so the operator must label the pool's
+                  nodes modelplane.ai/pool=<nodePools[].name> themselves, or
+                  worker pods for this class will stay Pending.
+                minItems: 1
+                maxItems: 16
+                x-kubernetes-list-type: map
+                x-kubernetes-list-map-keys: [name]
+                items:
+                  type: object
+                  required: [name, driver]
+                  properties:
+                    name:
+                      type: string
+                      description: >-
+                        Name of this device within the class (e.g. gpu, nic).
+                      minLength: 1
+                      maxLength: 63
+                    claim:
+                      type: string
+                      description: >-
+                        How Modelplane treats this device. DRA emits it as a
+                        request in a ResourceClaim, so DRA binds a matching
+                        device to the pod at admission time; use it for hardware
+                        a real DRA driver exposes. Synthetic describes the device
+                        for fleet scheduling only and never claims it; use it for
+                        hardware that matters for placement but has no DRA driver
+                        yet, like an InfiniBand fabric.
+                      enum: [DRA, Synthetic]
+                      default: DRA
+                    driver:
+                      type: string
+                      description: >-
+                        DRA driver that owns this device (e.g. gpu.nvidia.com).
+                        Becomes the attribute/capacity domain a nodeSelector
+                        reads as device.attributes["<driver>"].<name>.
+                      minLength: 1
+                      maxLength: 253
+                    deviceClassName:
+                      type: string
+                      description: >-
+                        Name of the cluster-scoped DRA DeviceClass to claim this
+                        device through. Required for claim: DRA devices; the DRA
+                        driver install creates the DeviceClass (e.g.
+                        gpu.nvidia.com). Ignored for Synthetic devices.
+                      minLength: 1
+                      maxLength: 253
+                    count:
+                      type: integer
+                      description: How many of this device a node has.
+                      default: 1
+                      minimum: 1
+                      maximum: 64
+                    attributes:
+                      type: object
+                      description: >-
+                        DRA-style typed attributes for this device. Keys are
+                        bare names (e.g. architecture); the domain comes from the
+                        device's driver. Each value sets exactly one typed field.
+                      maxProperties: 32
+                      additionalProperties:
+                        type: object
+                        properties:
+                          string:
+                            type: string
+                            maxLength: 253
+                          version:
+                            type: string
+                            description: Semantic version (e.g. "9.0.0").
+                            maxLength: 32
+                          bool:
+                            type: boolean
+                          int:
+                            type: integer
+                            format: int64
+                        x-kubernetes-validations:
+                        - rule: "[has(self.string), has(self.version), has(self.bool), has(self.int)].filter(x, x).size() == 1"
+                          message: "exactly one of string, version, bool, int must be set"
+                    capacity:
+                      type: object
+                      description: >-
+                        DRA-style capacity quantities for this device. Keys are
+                        bare names (e.g. memory); values are Kubernetes
+                        Quantities.
+                      maxProperties: 32
+                      additionalProperties:
+                        type: object
+                        required: [value]
+                        properties:
+                          value:
+                            type: string
+                            description: A Kubernetes Quantity (e.g. "141Gi").
+                            minLength: 1
+                            maxLength: 32
+                  x-kubernetes-validations:
+                  - rule: "self.claim != 'DRA' || has(self.deviceClassName)"
+                    message: "deviceClassName is required when claim is DRA"
           status:
             type: object
             properties:

diff --git a/apis/inferenceclusters/definition.yaml b/apis/inferenceclusters/definition.yaml
@@ -142,7 +142,11 @@ spec:
                         maxLength: 32
                       kubernetesVersion:
                         type: string
-                        default: "1.31"
+                        default: "1.36"
+                        description: >-
+                          EKS cluster Kubernetes version. Defaults to a
+                          version where Dynamic Resource Allocation (how GPUs
+                          bind to pods) is generally available.
                       cache:
                         type: object
                         description: >-
@@ -224,29 +228,83 @@ spec:
                     description: >-
                       External IP of the inference gateway on the remote cluster.
                       Used by ModelDeployment for unified endpoint routing.
-              capacity:
-                type: object
+              gpuPools:
+                type: array
                 description: >-
-                  Declared capacity derived from the referenced classes
-                  and the per-pool node counts.
-                properties:
-                  gpuPools:
-                    type: array
-                    items:
-                      type: object
-                      properties:
-                        acceleratorType:
-                          type: string
-                        memory:
-                          type: string
-                          description: Per-GPU VRAM (e.g. "24Gi").
-                        countPerNode:
-                          type: integer
-                        nodes:
-                          type: integer
-                          description: >-
-                            Number of nodes in this pool. Derived from
-                            maxNodeCount (if autoscaling) or nodeCount.
+                  Schedulable GPU node pools on this cluster, derived from the
+                  referenced classes and the per-pool node counts.
+                  ModelDeployment scheduling matches against these.
+                maxItems: 8
+                x-kubernetes-list-type: map
+                x-kubernetes-list-map-keys: [name]
+                items:
+                  type: object
+                  required: [name]
+                  properties:
+                    name:
+                      type: string
+                      description: >-
+                        Node pool name, matching spec.nodePools[].name.
+                        Used to pin a ModelReplica to a specific pool via
+                        spec.nodePoolName.
+                    nodes:
+                      type: integer
+                      description: >-
+                        Number of nodes in this pool. Derived from
+                        maxNodeCount (if autoscaling) or nodeCount.
+                    devices:
+                      type: array
+                      description: >-
+                        Devices copied from the pool's InferenceClass.
+                        ModelDeployment.nodeSelector matches against these.
+                      maxItems: 16
+                      x-kubernetes-list-type: map
+                      x-kubernetes-list-map-keys: [name]
+                      items:
+                        type: object
+                        required: [name, driver]
+                        properties:
+                          name:
+                            type: string
+                            maxLength: 63
+                          claim:
+                            type: string
+                            enum: [DRA, Synthetic]
+                          driver:
+                            type: string
+                            maxLength: 253
+                          deviceClassName:
+                            type: string
+                            maxLength: 253
+                          count:
+                            type: integer
+                          attributes:
+                            type: object
+                            maxProperties: 32
+                            additionalProperties:
+                              type: object
+                              properties:
+                                string:
+                                  type: string
+                                  maxLength: 253
+                                version:
+                                  type: string
+                                  maxLength: 32
+                                bool:
+                                  type: boolean
+                                int:
+                                  type: integer
+                                  format: int64
+                          capacity:
+                            type: object
+                            maxProperties: 32
+                            additionalProperties:
+                              type: object
+                              required: [value]
+                              properties:
+                                value:
+                                  type: string
+                                  maxLength: 32
               namespace:
                 type: string
                 description: >-

diff --git a/apis/modeldeployments/definition.yaml b/apis/modeldeployments/definition.yaml
@@ -25,7 +25,7 @@ spec:
         properties:
           spec:
             type: object
-            required: [replicas, workers]
+            required: [replicas, workers, nodeSelector]
             properties:
               replicas:
                 type: integer
@@ -43,6 +43,85 @@ spec:
                   matchLabels:
                     type: object
                     x-kubernetes-preserve-unknown-fields: true
+              nodeSelector:
+                type: object
+                description: >-
+                  Node-level matching, a list of device requests mirroring a
+                  DRA ResourceClaim. The scheduler matches each request against a
+                  candidate pool's InferenceClass devices (surfaced on
+                  InferenceCluster status.gpuPools) and pins the replica to a
+                  pool that satisfies every request. claim: DRA requests also
+                  become DeviceRequests in the ResourceClaim the serving pods
+                  bind GPUs through. Required: GPUs bind only via DRA, so a
+                  deployment must declare the devices its model needs. At least
+                  one request must resolve to a claimable (claim: DRA) device;
+                  the serving workload binds its GPUs through the resulting
+                  ResourceClaim. Synthetic devices refine placement but are never
+                  claimed, so a nodeSelector that matches only synthetic devices
+                  leaves the workload nothing to claim - the scheduler treats
+                  such a pool as ineligible and the deployment reports
+                  InsufficientCapacity.
+                required: [devices]
+                properties:
+                  devices:
+                    type: array
+                    description: >-
+                      Device requests. A pool matches a request when it has a
+                      device whose count covers the request and whose driver,
+                      attributes, and capacity satisfy every selector.
+                    minItems: 1
+                    maxItems: 16
+                    x-kubernetes-list-type: map
+                    x-kubernetes-list-map-keys: [name]
+                    items:
+                      type: object
+                      required: [name, selectors]
+                      properties:
+                        name:
+                          type: string
+                          description: >-
+                            Name of this request. Mirrors a DRA DeviceRequest
+                            name; carried through to the ResourceClaim.
+                          minLength: 1
+                          maxLength: 63
+                        count:
+                          type: integer
+                          description: >-
+                            How many matching devices a node must have. For a GPU
+                            request this is the per-node GPU count (matches the
+                            worker topology's GPUs per node).
+                          default: 1
+                          minimum: 1
+                          maximum: 64
+                        selectors:
+                          type: array
+                          description: >-
+                            Selectors a device must satisfy, all ANDed. Each is a
+                            one-of; today only cel is supported.
+                          minItems: 1
+                          maxItems: 8
+                          x-kubernetes-list-type: atomic
+                          items:
+                            type: object
+                            # A selector must carry at least one selector kind
+                            # (today only cel). Without this an empty {} selector
+                            # would match every device, and since nodeSelector is
+                            # the only path to a GPU that silently claims an
+                            # arbitrary one.
+                            minProperties: 1
+                            properties:
+                              cel:
+                                type: string
+                                description: >-
+                                  A DRA CEL expression evaluated against one
+                                  device. Reads device.driver,
+                                  device.attributes["<driver>"].<name> (typed),
+                                  and device.capacity["<driver>"].<name> (a
+                                  Quantity), with quantity() and semver() helpers,
+                                  e.g.
+                                  device.capacity["gpu.nvidia.com"].memory.compareTo(quantity("141Gi")) >= 0.
+                                minLength: 1
+                                maxLength: 10240
               modelCacheRef:
                 type: object
                 required: [name]