diff --git a/charts/ceph-csi-cephfs/templates/nodeplugin-daemonset.yaml b/charts/ceph-csi-cephfs/templates/nodeplugin-daemonset.yaml index d3a5b9df80ad..264e50ea9adb 100644 --- a/charts/ceph-csi-cephfs/templates/nodeplugin-daemonset.yaml +++ b/charts/ceph-csi-cephfs/templates/nodeplugin-daemonset.yaml @@ -85,6 +85,10 @@ spec: - "--drivername=$(DRIVER_NAME)" {{- if .Values.nodeplugin.profiling.enabled }} - "--enableprofiling={{ .Values.nodeplugin.profiling.enabled }}" +{{- end }} + - "--enable-read-affinity={{ and .Values.readAffinity .Values.readAffinity.enabled }}" +{{- if and .Values.readAffinity .Values.readAffinity.enabled }} + - "--crush-location-labels={{ .Values.readAffinity.crushLocationLabels | join "," }}" {{- end }} env: - name: POD_IP diff --git a/charts/ceph-csi-cephfs/values.yaml b/charts/ceph-csi-cephfs/values.yaml index 05bf0beab5b0..34f9b72dae22 100644 --- a/charts/ceph-csi-cephfs/values.yaml +++ b/charts/ceph-csi-cephfs/values.yaml @@ -222,6 +222,17 @@ provisioner: affinity: {} +# readAffinity: +# Enable read affinity for RBD volumes. Recommended to +# set to true if running kernel 5.8 or newer. +# enabled: false +# Define which node labels to use as CRUSH location. +# This should correspond to the values set in the CRUSH map. +# NOTE: the value here serves as an example +# crushLocationLabels: +# - topology.kubernetes.io/region +# - topology.kubernetes.io/zone + # Mount the host /etc/selinux inside pods to support # selinux-enabled filesystems selinuxMount: true diff --git a/internal/cephfs/driver.go b/internal/cephfs/driver.go index 7c6f1b18741e..ccaa264a9086 100644 --- a/internal/cephfs/driver.go +++ b/internal/cephfs/driver.go @@ -76,13 +76,17 @@ func NewNodeServer( topology map[string]string, kernelMountOptions string, fuseMountOptions string, + crushLocationMap map[string]string, ) *NodeServer { - return &NodeServer{ + ns := &NodeServer{ DefaultNodeServer: csicommon.NewDefaultNodeServer(d, t, topology), VolumeLocks: util.NewVolumeLocks(), kernelMountOptions: kernelMountOptions, fuseMountOptions: fuseMountOptions, } + ns.appendReadAffinityMapOptions(crushLocationMap) + + return ns } // Run start a non-blocking grpc controller,node and identityserver for @@ -90,6 +94,7 @@ func NewNodeServer( func (fs *Driver) Run(conf *util.Config) { var err error var topology map[string]string + var crushLocationMap map[string]string // Configuration if err = mounter.LoadAvailableMounters(conf); err != nil { @@ -100,6 +105,14 @@ func (fs *Driver) Run(conf *util.Config) { if conf.InstanceID != "" { CSIInstanceID = conf.InstanceID } + + if conf.EnableReadAffinity { + crushLocationMap, err = util.GetCrushLocationMap(conf.CrushLocationLabels, conf.NodeID) + if err != nil { + log.FatalLogMsg(err.Error()) + } + } + // Create an instance of the volume journal store.VolJournal = journal.NewCSIVolumeJournalWithNamespace(CSIInstanceID, fsutil.RadosNamespace) @@ -136,7 +149,7 @@ func (fs *Driver) Run(conf *util.Config) { if err != nil { log.FatalLogMsg(err.Error()) } - fs.ns = NewNodeServer(fs.cd, conf.Vtype, topology, conf.KernelMountOptions, conf.FuseMountOptions) + fs.ns = NewNodeServer(fs.cd, conf.Vtype, topology, conf.KernelMountOptions, conf.FuseMountOptions, crushLocationMap) } if conf.IsControllerServer { @@ -149,7 +162,7 @@ func (fs *Driver) Run(conf *util.Config) { if err != nil { log.FatalLogMsg(err.Error()) } - fs.ns = NewNodeServer(fs.cd, conf.Vtype, topology, conf.KernelMountOptions, conf.FuseMountOptions) + fs.ns = NewNodeServer(fs.cd, conf.Vtype, topology, conf.KernelMountOptions, conf.FuseMountOptions, crushLocationMap) fs.cs = NewControllerServer(fs.cd) } diff --git a/internal/cephfs/nodeserver.go b/internal/cephfs/nodeserver.go index 7287cde7605b..057b24f6161f 100644 --- a/internal/cephfs/nodeserver.go +++ b/internal/cephfs/nodeserver.go @@ -716,3 +716,23 @@ func (ns *NodeServer) NodeGetVolumeStats( return nil, status.Errorf(codes.InvalidArgument, "targetpath %q is not a directory or device", targetPath) } + +func (ns *NodeServer) appendReadAffinityMapOptions(crushLocationMap map[string]string) { + if len(crushLocationMap) == 0 { + return + } + + var b strings.Builder + b.WriteString("read_from_replica=localize,crush_location=") + first := true + for key, val := range crushLocationMap { + if first { + b.WriteString(fmt.Sprintf("%s:%s", key, val)) + first = false + } else { + b.WriteString(fmt.Sprintf("|%s:%s", key, val)) + } + } + options := []string{ns.kernelMountOptions, b.String()} + strings.Join(options, ",") +}