diff --git a/.github/workflows/basic-ci.yaml b/.github/workflows/basic-ci.yaml index e132b7b7..a5214c05 100644 --- a/.github/workflows/basic-ci.yaml +++ b/.github/workflows/basic-ci.yaml @@ -59,14 +59,16 @@ jobs: run: | pushd ndm-vagrant-rancherd ./scripts/attach-disk.sh node1 + sleep 30 vagrant ssh-config node1 > ../ssh-config cp kubeconfig ../kubeconfig popd - make ci-integration + echo Running integration tests + NDM_HOME=`pwd` go test -v ./tests/... - name: The Test failed if: ${{ failure() && steps.basic-test.conclusion == 'failure' }} run: | - ./ci/scripts/get-ndm-log.sh + ./ci/scripts/get-debug-info.sh teardown: needs: tests runs-on: diff --git a/ci/scripts/get-ndm-log.sh b/ci/scripts/get-debug-info.sh similarity index 58% rename from ci/scripts/get-ndm-log.sh rename to ci/scripts/get-debug-info.sh index 9fc2b929..7bfe44c7 100755 --- a/ci/scripts/get-ndm-log.sh +++ b/ci/scripts/get-debug-info.sh @@ -7,4 +7,8 @@ export KUBECONFIG=kubeconfig NDMPOD=$(kubectl get pods -n harvester-system --field-selector spec.nodeName=$TARGETNODE |grep ^harvester-node-disk-manager |awk '{print $1}') # filter out the redundant Skip log -kubectl logs $NDMPOD -n harvester-system |grep -v Skip \ No newline at end of file +kubectl logs $NDMPOD -n harvester-system |grep -v Skip + +# get blockdevices info +echo "========== Dump blockdevices ==========" +kubectl get blockdevice -n longhorn-system -o yaml \ No newline at end of file diff --git a/deploy/charts/harvester-node-disk-manager/templates/daemonset.yaml b/deploy/charts/harvester-node-disk-manager/templates/daemonset.yaml index b50649c3..eb9b41d3 100644 --- a/deploy/charts/harvester-node-disk-manager/templates/daemonset.yaml +++ b/deploy/charts/harvester-node-disk-manager/templates/daemonset.yaml @@ -30,6 +30,7 @@ spec: imagePullPolicy: {{ .Values.image.pullPolicy }} command: - node-disk-manager + - --debug env: {{- with .Values.vendorFilter }} - name: NDM_VENDOR_FILTER diff --git a/pkg/controller/blockdevice/controller.go b/pkg/controller/blockdevice/controller.go index 3a40c066..b60678aa 100644 --- a/pkg/controller/blockdevice/controller.go +++ b/pkg/controller/blockdevice/controller.go @@ -437,7 +437,7 @@ func (c *Controller) unprovisionDeviceFromNode(device *diskv1.BlockDevice) error } else { // Still unprovisioning c.Blockdevices.EnqueueAfter(c.Namespace, device.Name, jitterEnqueueDelay()) - logrus.Debugf("device %s is unprovisioning", device.Name) + logrus.Debugf("device %s is unprovisioning, status: %+v, ScheduledReplica: %d", device.Name, node.Status.DiskStatus[device.Name], len(status.ScheduledReplica)) } } else { // Start unprovisioing diff --git a/tests/integration/add_disk_test.go b/tests/integration/add_disk_test.go deleted file mode 100644 index d97bc5d1..00000000 --- a/tests/integration/add_disk_test.go +++ /dev/null @@ -1,105 +0,0 @@ -package integration - -import ( - "context" - "os" - "path/filepath" - "strings" - "testing" - - "github.com/kevinburke/ssh_config" - "github.com/melbahja/goph" - "github.com/stretchr/testify/require" - "github.com/stretchr/testify/suite" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/clientcmd" - - clientset "github.com/harvester/node-disk-manager/pkg/generated/clientset/versioned" -) - -type Suite struct { - suite.Suite - SSHClient *goph.Client - clientSet *clientset.Clientset - targetNodeName string -} - -type ProvisionedDisk struct { - devPath string - UUID string -} - -func (s *Suite) SetupSuite() { - nodeName := "" - f, _ := os.Open(filepath.Join(os.Getenv("HOME"), "ssh-config")) - cfg, _ := ssh_config.Decode(f) - // consider wildcard, so length shoule be 2 - require.Equal(s.T(), len(cfg.Hosts), 2, "number of Hosts on SSH-config should be 1") - for _, host := range cfg.Hosts { - if host.String() == "" { - // wildcard, continue - continue - } - nodeName = host.Patterns[0].String() - break - } - require.NotEqual(s.T(), nodeName, "", "nodeName should not be empty.") - s.targetNodeName = nodeName - targetHost, _ := cfg.Get(nodeName, "HostName") - targetUser, _ := cfg.Get(nodeName, "User") - targetPrivateKey, _ := cfg.Get(nodeName, "IdentityFile") - splitedResult := strings.Split(targetPrivateKey, "node-disk-manager/") - privateKey := filepath.Join(os.Getenv("HOME"), splitedResult[len(splitedResult)-1]) - // Start new ssh connection with private key. - auth, err := goph.Key(privateKey, "") - require.Equal(s.T(), err, nil, "generate ssh auth key should not get error") - - s.SSHClient, err = goph.NewUnknown(targetUser, targetHost, auth) - require.Equal(s.T(), err, nil, "New ssh connection should not get error") - - kubeconfig := filepath.Join(os.Getenv("HOME"), "kubeconfig") - config, err := clientcmd.BuildConfigFromFlags("", kubeconfig) - require.Equal(s.T(), err, nil, "Generate kubeconfig should not get error") - - s.clientSet, err = clientset.NewForConfig(config) - require.Equal(s.T(), err, nil, "New clientset should not get error") - -} - -func (s *Suite) AfterTest(_, _ string) { - if s.SSHClient != nil { - s.SSHClient.Close() - } -} - -func TestAddDisks(t *testing.T) { - suite.Run(t, new(Suite)) -} - -func (s *Suite) TestAddSingleDisk() { - // prepare to check the added disk - var provisionedDisk ProvisionedDisk - bdi := s.clientSet.HarvesterhciV1beta1().BlockDevices("longhorn-system") - bdList, err := bdi.List(context.TODO(), v1.ListOptions{}) - require.Equal(s.T(), err, nil, "Get BlockdevicesList should not get error") - for _, blockdevice := range bdList.Items { - if blockdevice.Spec.NodeName != s.targetNodeName { - // focus the target node - continue - } - bdStatus := blockdevice.Status - if bdStatus.State == "Active" && bdStatus.ProvisionPhase == "Provisioned" { - // get from blockdevice resource - provisionedDisk.devPath = bdStatus.DeviceStatus.DevPath - provisionedDisk.UUID = bdStatus.DeviceStatus.Details.UUID - - // checking with the device on the host - cmd := "sudo blkid -s UUID name -o value " + provisionedDisk.devPath - out, err := s.SSHClient.Run(cmd) - require.Equal(s.T(), err, nil, "Running command `blkid` should not get error") - require.NotEqual(s.T(), "", string(out), "blkid command should not return empty, ", provisionedDisk.devPath) - convertOutPut := strings.Split(string(out), "\n")[0] - require.Equal(s.T(), provisionedDisk.UUID, convertOutPut, "Provisioned disk UUID should be the same") - } - } -} diff --git a/tests/integration/test_0_single_disk_test.go b/tests/integration/test_0_single_disk_test.go new file mode 100644 index 00000000..3ea7b85d --- /dev/null +++ b/tests/integration/test_0_single_disk_test.go @@ -0,0 +1,150 @@ +package integration + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/kevinburke/ssh_config" + "github.com/melbahja/goph" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/clientcmd" + + diskv1 "github.com/harvester/node-disk-manager/pkg/apis/harvesterhci.io/v1beta1" + clientset "github.com/harvester/node-disk-manager/pkg/generated/clientset/versioned" +) + +type SingleDiskSuite struct { + suite.Suite + SSHClient *goph.Client + clientSet *clientset.Clientset + targetNodeName string + targetDiskName string +} + +type ProvisionedDisk struct { + devPath string + UUID string +} + +func (s *SingleDiskSuite) SetupSuite() { + nodeName := "" + f, _ := os.Open(filepath.Join(os.Getenv("NDM_HOME"), "ssh-config")) + cfg, _ := ssh_config.Decode(f) + // consider wildcard, so length shoule be 2 + require.Equal(s.T(), len(cfg.Hosts), 2, "number of Hosts on SSH-config should be 1") + for _, host := range cfg.Hosts { + if host.String() == "" { + // wildcard, continue + continue + } + nodeName = host.Patterns[0].String() + break + } + require.NotEqual(s.T(), nodeName, "", "nodeName should not be empty.") + s.targetNodeName = nodeName + targetHost, _ := cfg.Get(nodeName, "HostName") + targetUser, _ := cfg.Get(nodeName, "User") + targetPrivateKey, _ := cfg.Get(nodeName, "IdentityFile") + splitedResult := strings.Split(targetPrivateKey, "node-disk-manager/") + privateKey := filepath.Join(os.Getenv("NDM_HOME"), splitedResult[len(splitedResult)-1]) + // Start new ssh connection with private key. + auth, err := goph.Key(privateKey, "") + require.Equal(s.T(), err, nil, "generate ssh auth key should not get error") + + s.SSHClient, err = goph.NewUnknown(targetUser, targetHost, auth) + require.Equal(s.T(), err, nil, "New ssh connection should not get error") + + kubeconfig := filepath.Join(os.Getenv("NDM_HOME"), "kubeconfig") + config, err := clientcmd.BuildConfigFromFlags("", kubeconfig) + require.Equal(s.T(), err, nil, "Generate kubeconfig should not get error") + + s.clientSet, err = clientset.NewForConfig(config) + require.Equal(s.T(), err, nil, "New clientset should not get error") +} + +func (s *SingleDiskSuite) AfterTest(_, _ string) { + if s.SSHClient != nil { + s.SSHClient.Close() + } +} + +func TestSingleDiskOperation(t *testing.T) { + suite.Run(t, new(SingleDiskSuite)) +} + +func (s *SingleDiskSuite) Test_0_AutoProvisionSingleDisk() { + // prepare to check the added disk + var provisionedDisk ProvisionedDisk + bdi := s.clientSet.HarvesterhciV1beta1().BlockDevices("longhorn-system") + bdList, err := bdi.List(context.TODO(), v1.ListOptions{}) + require.Equal(s.T(), err, nil, "Get BlockdevicesList should not get error") + for _, blockdevice := range bdList.Items { + if blockdevice.Spec.NodeName != s.targetNodeName { + // focus the target node + continue + } + bdStatus := blockdevice.Status + if bdStatus.State == "Active" && bdStatus.ProvisionPhase == "Provisioned" { + s.targetDiskName = blockdevice.Name + // get from blockdevice resource + provisionedDisk.devPath = bdStatus.DeviceStatus.DevPath + provisionedDisk.UUID = bdStatus.DeviceStatus.Details.UUID + + // checking with the device on the host + cmd := "sudo blkid -s UUID name -o value " + provisionedDisk.devPath + out, err := s.SSHClient.Run(cmd) + require.Equal(s.T(), err, nil, "Running command `blkid` should not get error") + require.NotEqual(s.T(), "", string(out), "blkid command should not return empty, ", provisionedDisk.devPath) + convertOutPut := strings.Split(string(out), "\n")[0] + require.Equal(s.T(), provisionedDisk.UUID, convertOutPut, "Provisioned disk UUID should be the same") + } + } +} + +func (s *SingleDiskSuite) Test_1_UnprovisionSingleDisk() { + require.NotEqual(s.T(), s.targetDiskName, "", "target disk name should not be empty before we do the remove test") + bdi := s.clientSet.HarvesterhciV1beta1().BlockDevices("longhorn-system") + curBlockdevice, err := bdi.Get(context.TODO(), s.targetDiskName, v1.GetOptions{}) + require.Equal(s.T(), err, nil, "Get Blockdevices should not get error") + + newBlockdevice := curBlockdevice.DeepCopy() + newBlockdevice.Spec.FileSystem.Provisioned = false + bdi.Update(context.TODO(), newBlockdevice, v1.UpdateOptions{}) + + // sleep 30 seconds to wait controller handle. jitter is between 7~13 seconds so 30 seconds would be enough to run twice + time.Sleep(30 * time.Second) + + // check for the removed status + curBlockdevice, err = bdi.Get(context.TODO(), s.targetDiskName, v1.GetOptions{}) + require.Equal(s.T(), err, nil, "Get BlockdevicesList should not get error before we want to check remove") + require.Equal(s.T(), curBlockdevice.Status.DeviceStatus.FileSystem.MountPoint, "", "Mountpoint should be empty after we remove disk!") + require.Equal(s.T(), diskv1.ProvisionPhaseUnprovisioned, curBlockdevice.Status.ProvisionPhase, "Block device provisionPhase should be Provisioned") + +} + +func (s *SingleDiskSuite) Test_2_ManuallyProvisionSingleDisk() { + require.NotEqual(s.T(), s.targetDiskName, "", "target disk name should not be empty before we do the remove test") + bdi := s.clientSet.HarvesterhciV1beta1().BlockDevices("longhorn-system") + curBlockdevice, err := bdi.Get(context.TODO(), s.targetDiskName, v1.GetOptions{}) + require.Equal(s.T(), err, nil, "Get Blockdevices should not get error") + + newBlockdevice := curBlockdevice.DeepCopy() + newBlockdevice.Spec.FileSystem.Provisioned = true + bdi.Update(context.TODO(), newBlockdevice, v1.UpdateOptions{}) + + // sleep 3 seconds to wait controller handle + time.Sleep(3 * time.Second) + + // check for the added status + curBlockdevice, err = bdi.Get(context.TODO(), s.targetDiskName, v1.GetOptions{}) + require.Equal(s.T(), err, nil, "Get BlockdevicesList should not get error before we want to check remove") + require.NotEqual(s.T(), curBlockdevice.Status.DeviceStatus.FileSystem.MountPoint, "", "Mountpoint should be empty after we remove disk!") + require.Equal(s.T(), diskv1.ProvisionPhaseProvisioned, curBlockdevice.Status.ProvisionPhase, "Block device provisionPhase should be Provisioned") + require.Equal(s.T(), diskv1.BlockDeviceActive, curBlockdevice.Status.State, "Block device State should be Active") +} diff --git a/tests/integration/test_1_disk_hotplug_test.go b/tests/integration/test_1_disk_hotplug_test.go new file mode 100644 index 00000000..37f55422 --- /dev/null +++ b/tests/integration/test_1_disk_hotplug_test.go @@ -0,0 +1,160 @@ +package integration + +import ( + "bytes" + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/kevinburke/ssh_config" + "github.com/melbahja/goph" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/clientcmd" + + diskv1 "github.com/harvester/node-disk-manager/pkg/apis/harvesterhci.io/v1beta1" + clientset "github.com/harvester/node-disk-manager/pkg/generated/clientset/versioned" +) + +/* + * We have some assumption for the hotplug test: + * 1. we will reuse the disk that is added on the initinal operation of ci test + * 2. we use virsh command to remove disk/add back disk directly + * + * NOTE: The default qcow2 and xml location (created by initial operation) is `/tmp/hotplug_disks/`. + * File names are `node1-sda.qcow2` and `node1-sda.xml`. + * The target node name is `ndm-vagrant-rancherd_node1`. + */ + +const ( + hotplugTargetNodeName = "ndm-vagrant-rancherd_node1" + hotplugDiskXMLFileName = "/tmp/hotplug_disks/node1-sda.xml" + hotplugTargetDiskName = "sda" +) + +type HotPlugTestSuite struct { + suite.Suite + SSHClient *goph.Client + clientSet *clientset.Clientset + targetNodeName string + targetDiskName string +} + +func (s *HotPlugTestSuite) SetupSuite() { + nodeName := "" + f, _ := os.Open(filepath.Join(os.Getenv("NDM_HOME"), "ssh-config")) + cfg, _ := ssh_config.Decode(f) + // consider wildcard, so length shoule be 2 + require.Equal(s.T(), len(cfg.Hosts), 2, "number of Hosts on SSH-config should be 1") + for _, host := range cfg.Hosts { + if host.String() == "" { + // wildcard, continue + continue + } + nodeName = host.Patterns[0].String() + break + } + require.NotEqual(s.T(), nodeName, "", "nodeName should not be empty.") + s.targetNodeName = nodeName + targetHost, _ := cfg.Get(nodeName, "HostName") + targetUser, _ := cfg.Get(nodeName, "User") + targetPrivateKey, _ := cfg.Get(nodeName, "IdentityFile") + splitedResult := strings.Split(targetPrivateKey, "node-disk-manager/") + privateKey := filepath.Join(os.Getenv("NDM_HOME"), splitedResult[len(splitedResult)-1]) + // Start new ssh connection with private key. + auth, err := goph.Key(privateKey, "") + require.Equal(s.T(), err, nil, "generate ssh auth key should not get error") + + s.SSHClient, err = goph.NewUnknown(targetUser, targetHost, auth) + require.Equal(s.T(), err, nil, "New ssh connection should not get error") + + kubeconfig := filepath.Join(os.Getenv("NDM_HOME"), "kubeconfig") + config, err := clientcmd.BuildConfigFromFlags("", kubeconfig) + require.Equal(s.T(), err, nil, "Generate kubeconfig should not get error") + + s.clientSet, err = clientset.NewForConfig(config) + require.Equal(s.T(), err, nil, "New clientset should not get error") +} + +func (s *HotPlugTestSuite) AfterTest(_, _ string) { + if s.SSHClient != nil { + s.SSHClient.Close() + } +} + +func TestHotPlugDisk(t *testing.T) { + suite.Run(t, new(HotPlugTestSuite)) +} + +func (s *HotPlugTestSuite) Test_0_PreCheckForDiskCount() { + bdi := s.clientSet.HarvesterhciV1beta1().BlockDevices("longhorn-system") + bdList, err := bdi.List(context.TODO(), v1.ListOptions{}) + require.Equal(s.T(), err, nil, "Get BlockdevicesList should not get error") + diskCount := 0 + for _, blockdevice := range bdList.Items { + if blockdevice.Spec.NodeName != s.targetNodeName { + // focus the target node + continue + } + bdStatus := blockdevice.Status + if bdStatus.State == "Active" && bdStatus.ProvisionPhase == "Provisioned" { + diskCount++ + s.targetDiskName = blockdevice.Name + } + } + require.Equal(s.T(), 1, diskCount, "We should only have one disk.") +} + +func (s *HotPlugTestSuite) Test_1_HotPlugRemoveDisk() { + // remove disk dynamically + cmd := fmt.Sprintf("virsh detach-disk %s %s --live", hotplugTargetNodeName, hotplugTargetDiskName) + _, _, err := doCommand(cmd) + require.Equal(s.T(), err, nil, "Running command `virsh detach-disk` should not get error") + + // wait for controller handling + time.Sleep(5 * time.Second) + + // check disk status + require.NotEqual(s.T(), s.targetDiskName, "", "target disk name should not be empty before we start hotplug (remove) test") + bdi := s.clientSet.HarvesterhciV1beta1().BlockDevices("longhorn-system") + curBlockdevice, err := bdi.Get(context.TODO(), s.targetDiskName, v1.GetOptions{}) + require.Equal(s.T(), nil, err, "Get Blockdevices should not get error") + + require.Equal(s.T(), diskv1.BlockDeviceInactive, curBlockdevice.Status.State, "Disk status should be inactive after we remove disk") + +} + +func (s *HotPlugTestSuite) Test_2_HotPlugAddDisk() { + // remove disk dynamically + cmd := fmt.Sprintf("virsh attach-device --domain %s --file %s --live", hotplugTargetNodeName, hotplugDiskXMLFileName) + _, _, err := doCommand(cmd) + require.Equal(s.T(), err, nil, "Running command `virsh attach-device` should not get error") + + // wait for controller handling + time.Sleep(5 * time.Second) + + // check disk status + require.NotEqual(s.T(), s.targetDiskName, "", "target disk name should not be empty before we start hotplug (add) test") + bdi := s.clientSet.HarvesterhciV1beta1().BlockDevices("longhorn-system") + curBlockdevice, err := bdi.Get(context.TODO(), s.targetDiskName, v1.GetOptions{}) + require.Equal(s.T(), err, nil, "Get Blockdevices should not get error") + + require.Equal(s.T(), curBlockdevice.Status.State, diskv1.BlockDeviceActive, "Disk status should be inactive after we add disk") + +} + +func doCommand(cmdString string) (string, string, error) { + var stdout bytes.Buffer + var stderr bytes.Buffer + cmd := exec.Command("bash", "-c", cmdString) + cmd.Stdout = &stdout + cmd.Stderr = &stderr + err := cmd.Run() + return stdout.String(), stderr.String(), err +}