microsoft · helsaawy · Aug 13, 2025 · Aug 7, 2025 · Aug 7, 2025 · Aug 7, 2025
@@ -153,7 +153,7 @@ func launchShim(cmd, pidFile, logFile string, args []string, data interface{}) (
 		}
 
 		fullargs = append(fullargs, "--log-format", logFormat)
-		if logrus.GetLevel() == logrus.DebugLevel {
+		if logrus.IsLevelEnabled(logrus.DebugLevel) {
 			fullargs = append(fullargs, "--debug")
 		}
 	}

@@ -5,8 +5,7 @@
 // Do not rely on these annotations to customize production workload behavior.
 package annotations
 
-// uVM specific annotations
-
+// uVM annotations.
 const (
 	// UVMHyperVSocketConfigPrefix is the prefix of an annotation to map a [hyper-v socket] service GUID
 	// to a JSON-encoded string of its [configuration].
@@ -30,24 +29,15 @@ const (
 	// [configuration]: https://learn.microsoft.com/en-us/virtualization/api/hcs/schemareference#HvSocketServiceConfig
 	UVMHyperVSocketConfigPrefix = "io.microsoft.virtualmachine.hv-socket.service-table."
 
-	// AdditionalRegistryValues specifies additional registry keys and their values to set in the WCOW UVM.
-	// The format is a JSON-encoded string of an array containing [HCS RegistryValue] objects.
-	//
-	// Registry values will be available under `HKEY_LOCAL_MACHINE` root key.
-	//
-	// For example:
-	//
-	//	"[{\"Key\": {\"Hive\": \"System\", \"Name\": \"registry\\key\\path"}, \"Name\": \"ValueName\", \"Type\": \"String\", \"StringValue\": \"value\"}]"
-	//
-	// [HCS RegistryValue]: https://learn.microsoft.com/en-us/virtualization/api/hcs/schemareference#registryvalue
-	AdditionalRegistryValues = "io.microsoft.virtualmachine.wcow.additional-reg-keys"
-
-	// ExtraVSockPorts adds additional ports to the list of ports that the UVM is allowed to use.
-	ExtraVSockPorts = "io.microsoft.virtualmachine.lcow.extra-vsock-ports"
-
 	// UVMConsolePipe is the name of the named pipe that the UVM console is connected to. This works only for non-SNP
 	// scenario, for SNP use a debugger.
 	UVMConsolePipe = "io.microsoft.virtualmachine.console.pipe"
+)
+
+// LCOW uVM annotations.
+const (
+	// ExtraVSockPorts adds additional ports to the list of ports that the UVM is allowed to use.
+	ExtraVSockPorts = "io.microsoft.virtualmachine.lcow.extra-vsock-ports"
 
 	// NetworkingPolicyBasedRouting toggles on the ability to set policy based routing in the
 	// guest for LCOW.
@@ -57,3 +47,18 @@ const (
 	// LCOW scenarios. Ideally, this annotation should be removed if no issues are found.
 	NetworkingPolicyBasedRouting = "io.microsoft.virtualmachine.lcow.network.policybasedrouting"
 )
+
+// WCOW uVM annotations.
+const (
+	// AdditionalRegistryValues specifies additional registry keys and their values to set in the WCOW UVM.
+	// The format is a JSON-encoded string of an array containing [HCS RegistryValue] objects.
+	//
+	// Registry values will be available under `HKEY_LOCAL_MACHINE` root key.
+	//
+	// For example:
+	//
+	//	"[{\"Key\": {\"Hive\": \"System\", \"Name\": \"registry\\key\\path"}, \"Name\": \"ValueName\", \"Type\": \"String\", \"StringValue\": \"value\"}]"
+	//
+	// [HCS RegistryValue]: https://learn.microsoft.com/en-us/virtualization/api/hcs/schemareference#registryvalue
+	AdditionalRegistryValues = "io.microsoft.virtualmachine.wcow.additional-reg-keys"
+)
@@ -404,7 +404,7 @@ func (brdg *bridge) writeMessage(buf *bytes.Buffer, enc *json.Encoder, typ msgTy
 	// Update the message header with the size.
 	binary.LittleEndian.PutUint32(buf.Bytes()[hdrOffSize:], uint32(buf.Len()))
 
-	if brdg.log.Logger.GetLevel() > logrus.DebugLevel {
+	if brdg.log.Logger.IsLevelEnabled(logrus.TraceLevel) {
 		b := buf.Bytes()[hdrSize:]
 		switch typ {
 		// container environment vars are in rpCreate for linux; rpcExecuteProcess for windows

@@ -310,7 +310,7 @@ func (b *Bridge) ListenAndServe(bridgeIn io.ReadCloser, bridgeOut io.WriteCloser
 					trace.StringAttribute("cid", base.ContainerID))
 
 				entry := log.G(ctx)
-				if entry.Logger.GetLevel() > logrus.DebugLevel {
+				if entry.Logger.IsLevelEnabled(logrus.TraceLevel) {
 					var err error
 					var msgBytes []byte
 					switch header.Type {

@@ -170,7 +170,7 @@ func NetNSConfig(ctx context.Context, ifStr string, nsPid int, adapter *guestres
 	}
 
 	// Add some debug logging
-	if entry.Logger.GetLevel() >= logrus.DebugLevel {
+	if entry.Logger.IsLevelEnabled(logrus.DebugLevel) {
 		curNS, _ := netns.Get()
 		// Refresh link attributes/state
 		link, _ = netlink.LinkByIndex(link.Attrs().Index)

@@ -23,8 +23,9 @@ import (
 const nvidiaDebugFilePath = "nvidia-container.log"
 const nvidiaToolBinary = "nvidia-container-cli"
 
-// described here: https://github.com/opencontainers/runtime-spec/blob/39c287c415bf86fb5b7506528d471db5405f8ca8/config.md#posix-platform-hooks
-// addNvidiaDeviceHook builds the arguments for nvidia-container-cli and creates the prestart hook
+// addNvidiaDeviceHook builds the arguments for nvidia-container-cli and creates the createRuntime [OCI hooks].
+//
+// [OCI hooks]: https://github.com/opencontainers/runtime-spec/blob/39c287c415bf86fb5b7506528d471db5405f8ca8/config.md#posix-platform-hooks
 func addNvidiaDeviceHook(ctx context.Context, spec *oci.Spec, ociBundlePath string) error {
 	genericHookBinary := "generichook"
 	genericHookPath, err := exec.LookPath(genericHookBinary)

@@ -220,6 +220,25 @@ func setupWorkloadContainerSpec(ctx context.Context, sbid, id string, spec *oci.
 			if err := addNvidiaDeviceHook(ctx, spec, ociBundlePath); err != nil {
 				return err
 			}
+
+			// The NVIDIA device hook `nvidia-container-cli` adds `rw` permissions for the
+			// GPU and ctl nodes (`c 195:*`) to the  devices allow list, but CUDA apparently also
+			// needs `rwm` permission for other device nodes (e.g., `c 235`)
+			//
+			// Grant `rwm` to all character devices (`c *:* rwm`) to avoid hard coding exact node
+			// numbers, which are unknown before the driver runs (GPU devices are presented as I2C
+			// devices initially) or could change with driver implementation.
+			//
+			// Note: runc already grants mknod, `c *:* m`, so this really adds `rw` permissions for
+			// all character devices:
+			// https://github.com/opencontainers/runc/blob/6bae6cad4759a5b3537d550f43ea37d51c6b518a/libcontainer/specconv/spec_linux.go#L205-L222
+			spec.Linux.Resources.Devices = append(spec.Linux.Resources.Devices,
+				oci.LinuxDeviceCgroup{
+					Allow:  true,
+					Type:   "c",
+					Access: "rwm",
+				},
+			)
 		}
 		// add other assigned devices to the spec
 		if err := specGuest.AddAssignedDevice(ctx, spec); err != nil {

@@ -10,11 +10,13 @@ import (
 	"strings"
 	"time"
 
-	"github.com/Microsoft/hcsshim/internal/guest/storage/pci"
-	"github.com/Microsoft/hcsshim/internal/log"
 	"github.com/opencontainers/runc/libcontainer/devices"
 	oci "github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/pkg/errors"
+	"github.com/sirupsen/logrus"
+
+	"github.com/Microsoft/hcsshim/internal/guest/storage/pci"
+	"github.com/Microsoft/hcsshim/internal/log"
 )
 
 const (
@@ -23,13 +25,17 @@ const (
 	charType  = "char"
 	blockType = "block"
 
+	// TODO: consolidate with `internal\uvm\virtual_device.go` and use in both locations
+	gpuDeviceIDType        = "gpu"
 	vpciDeviceIDTypeLegacy = "vpci"
 	vpciDeviceIDType       = "vpci-instance-id"
 )
 
 // AddAssignedDevice goes through the assigned devices that have been enumerated
 // on the spec and updates the spec so that the correct device nodes can be mounted
 // into the resulting container by the runtime.
+//
+// GPU devices are skipped, since they are handled in [addNvidiaDeviceHook].
 func AddAssignedDevice(ctx context.Context, spec *oci.Spec) error {
 	// Add an explicit timeout before we try to find the dev nodes so we
 	// aren't waiting forever.
@@ -52,6 +58,12 @@ func AddAssignedDevice(ctx context.Context, spec *oci.Spec) error {
 			for _, dev := range devs {
 				AddLinuxDeviceToSpec(ctx, dev, spec, true)
 			}
+		case gpuDeviceIDType:
+		default:
+			log.G(ctx).WithFields(logrus.Fields{
+				"type": d.IDType,
+				"id":   d.ID,
+			}).Warn("unknown device type")
 		}
 	}
 

@@ -174,21 +174,21 @@ func handleAssignedDevicesLCOW(
 
 	// assign device into UVM and create corresponding spec windows devices
 	for _, d := range specDevs {
-		if uvm.IsValidDeviceType(d.IDType) {
-			pciID, index := devices.GetDeviceInfoFromPath(d.ID)
-			vpci, err := vm.AssignDevice(ctx, pciID, index, "")
-			if err != nil {
-				return resultDevs, closers, errors.Wrapf(err, "failed to assign device %s, function %d to pod %s", pciID, index, vm.ID())
-			}
-			closers = append(closers, vpci)
-
-			// update device ID on the spec to the assigned device's resulting vmbus guid so gcs knows which devices to
-			// map into the container
-			d.ID = vpci.VMBusGUID
-			resultDevs = append(resultDevs, d)
-		} else {
+		if !uvm.IsValidDeviceType(d.IDType) {
 			return resultDevs, closers, errors.Errorf("specified device %s has unsupported type %s", d.ID, d.IDType)
 		}
+
+		pciID, index := devices.GetDeviceInfoFromPath(d.ID)
+		vpci, err := vm.AssignDevice(ctx, pciID, index, "")
+		if err != nil {
+			return resultDevs, closers, errors.Wrapf(err, "failed to assign device %s, function %d to pod %s", pciID, index, vm.ID())
+		}
+		closers = append(closers, vpci)
+
+		// update device ID on the spec to the assigned device's resulting vmbus guid so gcs knows which devices to
+		// map into the container
+		d.ID = vpci.VMBusGUID
+		resultDevs = append(resultDevs, d)
 	}
 
 	return resultDevs, closers, nil

@@ -34,7 +34,7 @@ func ProcessAnnotations(ctx context.Context, s *specs.Spec) error {
 
 	// expand annotations
 	var errs []error
-	for key, exps := range annotations.AnnotationExpansions {
+	for key, exps := range annotations.AnnotationExpansionMap() {
 		// check if annotation is present
 		if val, ok := s.Annotations[key]; ok {
 			// ideally, some normalization would occur here (ie, "True" -> "true")

@@ -186,7 +186,8 @@ func TestProccessAnnotations_Expansion(t *testing.T) {
 					subtest.Fatalf("could not update spec from options: %v", err)
 				}
 
-				for _, k := range annotations.AnnotationExpansions[annotations.DisableUnsafeOperations] {
+				ae := annotations.AnnotationExpansionMap()
+				for _, k := range ae[annotations.DisableUnsafeOperations] {
 					if vv := tt.spec.Annotations[k]; vv != v {
 						subtest.Fatalf("annotation %q was incorrectly expanded to %q, expected %q", k, vv, v)
 					}

@@ -268,8 +268,10 @@ func specToUVMCreateOptionsCommon(ctx context.Context, opts *uvm.Options, s *spe
 	opts.ProcessDumpLocation = ParseAnnotationsString(s.Annotations, annotations.ContainerProcessDumpLocation, opts.ProcessDumpLocation)
 	opts.NoWritableFileShares = ParseAnnotationsBool(ctx, s.Annotations, annotations.DisableWritableFileShares, opts.NoWritableFileShares)
 	opts.DumpDirectoryPath = ParseAnnotationsString(s.Annotations, annotations.DumpDirectoryPath, opts.DumpDirectoryPath)
+
+	// NUMA settings
 	opts.MaxProcessorsPerNumaNode = ParseAnnotationsUint32(ctx, s.Annotations, annotations.NumaMaximumProcessorsPerNode, opts.MaxProcessorsPerNumaNode)
-	opts.MaxSizePerNode = ParseAnnotationsUint64(ctx, s.Annotations, annotations.NumaMaximumSizePerNode, opts.MaxSizePerNode)
+	opts.MaxMemorySizePerNumaNode = ParseAnnotationsUint64(ctx, s.Annotations, annotations.NumaMaximumMemorySizePerNode, opts.MaxMemorySizePerNumaNode)
 	opts.PreferredPhysicalNumaNodes = ParseAnnotationCommaSeparatedUint32(ctx, s.Annotations, annotations.NumaPreferredPhysicalNodes,
 		opts.PreferredPhysicalNumaNodes)
 	opts.NumaMappedPhysicalNodes = ParseAnnotationCommaSeparatedUint32(ctx, s.Annotations, annotations.NumaMappedPhysicalNodes,
@@ -278,6 +280,7 @@ func specToUVMCreateOptionsCommon(ctx context.Context, opts *uvm.Options, s *spe
 		opts.NumaProcessorCounts)
 	opts.NumaMemoryBlocksCounts = ParseAnnotationCommaSeparatedUint64(ctx, s.Annotations, annotations.NumaCountOfMemoryBlocks,
 		opts.NumaMemoryBlocksCounts)
+
 	maps.Copy(opts.AdditionalHyperVConfig, parseHVSocketServiceTable(ctx, s.Annotations))
 }
 

@@ -108,8 +108,8 @@ type Options struct {
 	AdditionalHyperVConfig map[string]hcsschema.HvSocketServiceConfig
 
 	// The following options are for implicit vNUMA topology settings.
-	// MaxSizePerNode is the maximum size of memory per vNUMA node.
-	MaxSizePerNode uint64
+	// MaxMemorySizePerNumaNode is the maximum size of memory (in MiB) per vNUMA node.
+	MaxMemorySizePerNumaNode uint64
 	// MaxProcessorsPerNumaNode is the maximum number of processors per vNUMA node.
 	MaxProcessorsPerNumaNode uint32
 	// PhysicalNumaNodes are the preferred physical NUMA nodes to map to vNUMA nodes.

@@ -596,7 +596,7 @@ func makeLCOWDoc(ctx context.Context, opts *OptionsLCOW, uvm *UtilityVM) (_ *hcs
 		return nil, err
 	}
 
-	numa, numaProcessors, err := prepareVNumaTopology(opts.Options)
+	numa, numaProcessors, err := prepareVNumaTopology(ctx, opts.Options)
 	if err != nil {
 		return nil, err
 	}

@@ -174,7 +174,7 @@ func prepareCommonConfigDoc(ctx context.Context, uvm *UtilityVM, opts *OptionsWC
 		Weight: uint64(opts.ProcessorWeight),
 	}
 
-	numa, numaProcessors, err := prepareVNumaTopology(opts.Options)
+	numa, numaProcessors, err := prepareVNumaTopology(ctx, opts.Options)
 	if err != nil {
 		return nil, err
 	}

@@ -3,45 +3,68 @@
 package uvm
 
 import (
+	"context"
 	"fmt"
 
+	"github.com/sirupsen/logrus"
+
 	hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
+	"github.com/Microsoft/hcsshim/internal/log"
 	"github.com/Microsoft/hcsshim/osversion"
 )
 
 // prepareVNumaTopology creates vNUMA settings for implicit (platform) or explicit (user-defined) topology.
 //
-// For implicit topology we look for `MaxProcessorsPerNumaNode`, `MaxSizePerNode` and `preferredNumaNodes` create options. Setting them
-// in HCS doc, will trigger platform to create vNUMA topology based on the given values. Based on experiments, the
-// platform will create an evenly distributed topology based on requested memory and processor count for the HCS VM.
+// For implicit topology we look for `MaxProcessorsPerNumaNode`, `MaxMemorySizePerNumaNode` and
+// `PreferredPhysicalNumaNodes` create options.
+// Setting them in HCS doc will trigger platform to create vNUMA topology based on the given values.
+// Based on experiments, the platform will create an evenly distributed topology based on
+// requested memory and processor count for the HCS VM.
 //
-// For explicit topology we look for `NumaMappedPhysicalNodes`, `NumaProcessorCounts` and `NumaMemoryBlocksCounts` create
-// options. The above options are number slices, where a value at index `i` in each slice represents the corresponding
+// For explicit topology we look for `NumaMappedPhysicalNodes`, `NumaProcessorCounts` and
+// `NumaMemoryBlocksCounts` create options.
+// The above options are number slices, where a value at index `i` in each slice represents the corresponding
 // value for the `i`th vNUMA node.
+//
 // Limitations:
-// - only hcsschema.MemoryBackingType_PHYSICAL is supported
-// - `PhysicalNumaNodes` values at index `i` will be mapped to virtual node number `i`
-// - client is responsible for setting wildcard physical node numbers
-// TODO: Add exact OS build version for vNUMA support.
-func prepareVNumaTopology(opts *Options) (*hcsschema.Numa, *hcsschema.NumaProcessors, error) {
+//
+//   - only `hcsschema.MemoryBackingType_PHYSICAL` is supported
+//   - `PhysicalNumaNodes` values at index `i` will be mapped to virtual node number `i`
+//   - client is responsible for setting wildcard physical node numbers
+func prepareVNumaTopology(ctx context.Context, opts *Options) (*hcsschema.Numa, *hcsschema.NumaProcessors, error) {
 	if opts.MaxProcessorsPerNumaNode == 0 && len(opts.NumaMappedPhysicalNodes) == 0 {
+		// warn if vNUMA settings are partially specified, since its likely an error on the client's side
+		if opts.MaxMemorySizePerNumaNode > 0 || len(opts.PreferredPhysicalNumaNodes) > 0 {
+			log.G(ctx).WithFields(logrus.Fields{
+				"max-memory-size-per-numa-node": opts.MaxMemorySizePerNumaNode,
+				"max-processors-per-numa-node":  opts.MaxProcessorsPerNumaNode,
+				"preferred-physical-numa-nodes": log.Format(ctx, opts.PreferredPhysicalNumaNodes),
+			}).Warn("potentially incomplete implicit vNUMA topology")
+		}
+		if len(opts.NumaProcessorCounts) > 0 || len(opts.NumaMemoryBlocksCounts) > 0 {
+			log.G(ctx).WithFields(logrus.Fields{
+				"numa-mapped-physical-nodes": log.Format(ctx, opts.NumaMappedPhysicalNodes),
+				"numa-processor-counts":      log.Format(ctx, opts.NumaProcessorCounts),
+				"numa-memory-block-counts":   log.Format(ctx, opts.NumaMemoryBlocksCounts),
+			}).Warn("potentially incomplete explicit vNUMA topology")
+		}
 		// vNUMA settings are missing, return empty topology
 		return nil, nil, nil
 	}
 
+	// TODO: Add exact OS build version for vNUMA support, or check for dedicated NUMA feature.
+	if build := osversion.Build(); build < osversion.V25H1Server {
+		return nil, nil, fmt.Errorf("vNUMA topology is not supported on %d version of Windows", build)
+	}
+
 	var preferredNumaNodes []int64
 	for _, pn := range opts.PreferredPhysicalNumaNodes {
 		preferredNumaNodes = append(preferredNumaNodes, int64(pn))
 	}
 
-	build := osversion.Get().Build
-	if build < osversion.V25H1Server {
-		return nil, nil, fmt.Errorf("vNUMA topology is not supported on %d version of Windows", build)
-	}
-
 	// Implicit vNUMA topology.
 	if opts.MaxProcessorsPerNumaNode > 0 {
-		if opts.MaxSizePerNode == 0 {
+		if opts.MaxMemorySizePerNumaNode == 0 {
 			return nil, nil, fmt.Errorf("max size per node must be set when max processors per numa node is set")
 		}
 		numaProcessors := &hcsschema.NumaProcessors{
@@ -50,9 +73,15 @@ func prepareVNumaTopology(opts *Options) (*hcsschema.Numa, *hcsschema.NumaProces
 			},
 		}
 		numa := &hcsschema.Numa{
-			MaxSizePerNode:         opts.MaxSizePerNode,
+			MaxSizePerNode:         opts.MaxMemorySizePerNumaNode,
 			PreferredPhysicalNodes: preferredNumaNodes,
 		}
+		if entry := log.G(ctx); entry.Logger.IsLevelEnabled(logrus.DebugLevel) {
+			entry.WithFields(logrus.Fields{
+				"numa":            log.Format(ctx, numa),
+				"numa-processors": log.Format(ctx, numaProcessors),
+			}).Debug("created implicit NUMA topology")
+		}
 		return numa, numaProcessors, nil
 	}
 
@@ -79,6 +108,9 @@ func prepareVNumaTopology(opts *Options) (*hcsschema.Numa, *hcsschema.NumaProces
 		}
 		numa.Settings = append(numa.Settings, nodeTopology)
 	}
+	if entry := log.G(ctx); entry.Logger.IsLevelEnabled(logrus.DebugLevel) {
+		entry.WithField("numa", log.Format(ctx, numa)).Debug("created explicit NUMA topology")
+	}
 	return numa, nil, validate(numa)
 }