Skip to content

Adding application high availability check in operator upgrade tests #2039

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;

import oracle.weblogic.kubernetes.actions.impl.OperatorParams;
Expand All @@ -19,6 +19,8 @@
import oracle.weblogic.kubernetes.annotations.tags.MustNotRunInParallel;
import oracle.weblogic.kubernetes.logging.LoggingFacade;
import oracle.weblogic.kubernetes.utils.CleanupUtil;
import oracle.weblogic.kubernetes.utils.DeployUtil;
import oracle.weblogic.kubernetes.utils.ExecResult;
import org.awaitility.core.ConditionFactory;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
Expand All @@ -32,18 +34,23 @@
import static java.util.concurrent.TimeUnit.SECONDS;
import static oracle.weblogic.kubernetes.TestConstants.ADMIN_PASSWORD_DEFAULT;
import static oracle.weblogic.kubernetes.TestConstants.ADMIN_USERNAME_DEFAULT;
import static oracle.weblogic.kubernetes.TestConstants.K8S_NODEPORT_HOST;
import static oracle.weblogic.kubernetes.TestConstants.OPERATOR_CHART_DIR;
import static oracle.weblogic.kubernetes.TestConstants.OPERATOR_GITHUB_CHART_REPO_URL;
import static oracle.weblogic.kubernetes.TestConstants.OPERATOR_RELEASE_NAME;
import static oracle.weblogic.kubernetes.TestConstants.RESULTS_ROOT;
import static oracle.weblogic.kubernetes.TestConstants.WDT_BASIC_IMAGE_NAME;
import static oracle.weblogic.kubernetes.TestConstants.WDT_BASIC_IMAGE_TAG;
import static oracle.weblogic.kubernetes.actions.ActionConstants.ITTESTS_DIR;
import static oracle.weblogic.kubernetes.actions.ActionConstants.RESOURCE_DIR;
import static oracle.weblogic.kubernetes.actions.TestActions.getOperatorContainerImageName;
import static oracle.weblogic.kubernetes.actions.TestActions.getOperatorImageName;
import static oracle.weblogic.kubernetes.actions.TestActions.getServiceNodePort;
import static oracle.weblogic.kubernetes.actions.TestActions.uninstallOperator;
import static oracle.weblogic.kubernetes.assertions.TestAssertions.adminNodePortAccessible;
import static oracle.weblogic.kubernetes.assertions.TestAssertions.appAccessibleInPod;
import static oracle.weblogic.kubernetes.assertions.TestAssertions.checkHelmReleaseRevision;
import static oracle.weblogic.kubernetes.utils.CommonTestUtils.checkAppIsRunning;
import static oracle.weblogic.kubernetes.utils.CommonTestUtils.checkPodReady;
import static oracle.weblogic.kubernetes.utils.CommonTestUtils.checkServiceExists;
import static oracle.weblogic.kubernetes.utils.CommonTestUtils.createOcirRepoSecret;
Expand All @@ -56,6 +63,8 @@
import static oracle.weblogic.kubernetes.utils.ThreadSafeLogger.getLogger;
import static org.awaitility.Awaitility.with;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

Expand All @@ -68,8 +77,8 @@
@MustNotRunInParallel
public class ItOperatorUpgrade {

private static ConditionFactory withStandardRetryPolicy = null;
private static Map<String, Object> secretNameMap;
private static ConditionFactory withStandardRetryPolicy;
private static ConditionFactory withQuickRetryPolicy;
private static LoggingFacade logger = null;
private String domainUid = "domain1";
private String adminServerPodName = domainUid + "-admin-server";
Expand All @@ -90,6 +99,11 @@ public static void init() {
.and().with().pollInterval(10, SECONDS)
.atMost(5, MINUTES).await();

// create a reusable quick retry policy
withQuickRetryPolicy = with().pollDelay(0, SECONDS)
.and().with().pollInterval(4, SECONDS)
.atMost(10, SECONDS).await();

}

/**
Expand Down Expand Up @@ -123,6 +137,8 @@ public void testOperatorUpgradeFrom2_6_0(@Namespaces(3) List<String> namespaces)
/**
* Operator upgrade from 3.0.0 to latest.
* Install 3.0.0 Operator from GitHub chart repository and create a domain.
* Deploy an application to the cluster in domain and verify the application can be
* accessed while the operator is upgraded and after the upgrade.
* Upgrade operator with latest Operator image and verify CRD version and image are updated
* and the domain can be managed by scaling the cluster using operator REST api.
*/
Expand All @@ -137,6 +153,8 @@ public void testOperatorUpgradeFrom3_0_0(@Namespaces(3) List<String> namespaces)
/**
* Operator upgrade from 3.0.1 to latest.
* Install 3.0.1 Operator from GitHub chart repository and create a domain.
* Deploy an application to the cluster in domain and verify the application can be
* accessed while the operator is upgraded and after the upgrade.
* Upgrade operator with latest Operator image and verify CRD version and image are updated
* and the domain can be managed by scaling the cluster using operator REST api.
*/
Expand All @@ -152,6 +170,8 @@ public void testOperatorUpgradeFrom3_0_1(@Namespaces(3) List<String> namespaces)
/**
* Operator upgrade from 3.0.2 to latest.
* Install 3.0.2 Operator from GitHub chart repository and create a domain.
* Deploy an application to the cluster in domain and verify the application can be
* accessed while the operator is upgraded and after the upgrade.
* Upgrade operator with latest Operator image and verify CRD version and image are updated
* and the domain can be managed by scaling the cluster using operator REST api.
*/
Expand All @@ -163,13 +183,32 @@ public void testOperatorUpgradeFrom3_0_2(@Namespaces(3) List<String> namespaces)
upgradeOperator("3.0.2", true);
}


/**
* Operator upgrade from 3.0.3 to latest.
* Install 3.0.3 Operator from GitHub chart repository and create a domain.
* Deploy an application to the cluster in domain and verify the application can be
* accessed while the operator is upgraded and after the upgrade.
* Upgrade operator with latest Operator image and verify CRD version and image are updated
* and the domain can be managed by scaling the cluster using operator REST api.
*/
@Test
@DisplayName("Upgrade Operator from 3.0.3 to latest")
@MustNotRunInParallel
public void testOperatorUpgradeFrom3_0_3(@Namespaces(3) List<String> namespaces) {
this.namespaces = namespaces;
upgradeOperator("3.0.3", true);
}

/**
* Cleanup Kubernetes artifacts in the namespaces used by the test and
* delete CRD.
*/
@AfterEach
public void tearDown() {
if (System.getenv("SKIP_CLEANUP") == null) {
if (System.getenv("SKIP_CLEANUP") == null
|| (System.getenv("SKIP_CLEANUP") != null
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is the point of checking for both null and not null? If its null isn't it the next statement going to throw NPE?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

clarified on slack

&& System.getenv("SKIP_CLEANUP").equalsIgnoreCase("false"))) {
CleanupUtil.cleanup(namespaces);
new Command()
.withParams(new CommandParams()
Expand Down Expand Up @@ -215,36 +254,72 @@ private void upgradeOperator(String operatorVersion, boolean useHelmUpgrade) {
domainNamespace, operatorVersion, TestConstants.OLD_DEFAULT_EXTERNAL_SERVICE_NAME_SUFFIX);

if (useHelmUpgrade) {
// upgrade to latest operator
HelmParams upgradeHelmParams = new HelmParams()
.releaseName(OPERATOR_RELEASE_NAME)
.namespace(opNamespace)
.chartDir(OPERATOR_CHART_DIR)
.repoUrl(null)
.chartVersion(null)
.chartName(null);

// operator chart values
OperatorParams opParams = new OperatorParams()
.helmParams(upgradeHelmParams)
.image(latestOperatorImageName)
.externalRestEnabled(true);

assertTrue(upgradeAndVerifyOperator(opNamespace, opParams),
String.format("Failed to upgrade operator in namespace %s", opNamespace));

// check operator image name after upgrade
logger.info("Checking image name in operator container ");
withStandardRetryPolicy
.conditionEvaluationListener(
condition -> logger.info("Checking operator image name in namespace {0} after upgrade "
+ "(elapsed time {1}ms, remaining time {2}ms)",
opNamespace1,
condition.getElapsedTimeInMS(),
condition.getRemainingTimeInMS()))
.until(assertDoesNotThrow(() -> getOpContainerImageName(opNamespace1),
"Exception while getting the operator image name"));

// application high availability check only for 3.x releases and later
// deploy application and access the application once to make sure the app is accessible
deployAndAccessApplication(domainNamespace);

// start a new thread to collect the availability data of the application while the
// main thread performs operator upgrade
List<Integer> appAvailability = new ArrayList<Integer>();
logger.info("Start a thread to keep track of the application's availability");
Thread accountingThread =
new Thread(
() -> {
collectAppAvailability(
domainNamespace,
opNamespace1,
appAvailability,
managedServerPodNamePrefix,
replicaCount,
"8001",
"testwebapp/index.jsp");
});
accountingThread.start();

try {
// upgrade to latest operator
HelmParams upgradeHelmParams = new HelmParams()
.releaseName(OPERATOR_RELEASE_NAME)
.namespace(opNamespace)
.chartDir(OPERATOR_CHART_DIR)
.repoUrl(null)
.chartVersion(null)
.chartName(null);

// operator chart values
OperatorParams opParams = new OperatorParams()
.helmParams(upgradeHelmParams)
.image(latestOperatorImageName)
.externalRestEnabled(true);

assertTrue(upgradeAndVerifyOperator(opNamespace, opParams),
String.format("Failed to upgrade operator in namespace %s", opNamespace));

// check operator image name after upgrade
logger.info("Checking image name in operator container ");
withStandardRetryPolicy
.conditionEvaluationListener(
condition -> logger.info("Checking operator image name in namespace {0} after upgrade "
+ "(elapsed time {1}ms, remaining time {2}ms)",
opNamespace1,
condition.getElapsedTimeInMS(),
condition.getRemainingTimeInMS()))
.until(assertDoesNotThrow(() -> getOpContainerImageName(opNamespace1),
"Exception while getting the operator image name"));
} finally {
if (accountingThread != null) {
try {
accountingThread.join();
} catch (InterruptedException ie) {
// do nothing
}
// check the application availability data that we have collected, and see if
// the application has been available all the time during the upgrade
logger.info("Verify that the application was available when the operator was being upgraded");
assertTrue(appAlwaysAvailable(appAvailability),
"Application was not always available when the operator was getting upgraded");
}
}
} else {
opNamespace = opNamespace2;
opServiceAccount = opNamespace2 + "-sa";
Expand Down Expand Up @@ -402,4 +477,85 @@ private Callable<Boolean> getOpContainerImageName(String namespace) {
};
}

private void deployAndAccessApplication(String namespace) {
logger.info("Getting node port for admin server default channel");
int serviceNodePort = assertDoesNotThrow(() ->
getServiceNodePort(namespace, getExternalServicePodName(adminServerPodName,
TestConstants.OLD_DEFAULT_EXTERNAL_SERVICE_NAME_SUFFIX), "default"),
"Getting admin server node port failed");
assertNotEquals(-1, serviceNodePort, "admin server default node port is not valid");

Path archivePath = Paths.get(ITTESTS_DIR, "../src/integration-tests/apps/testwebapp.war");
logger.info("Deploying application {0} to domain {1} cluster target cluster-1 in namespace {2}",
archivePath, domainUid, namespace);
ExecResult result = DeployUtil.deployUsingRest(K8S_NODEPORT_HOST,
String.valueOf(serviceNodePort),
ADMIN_USERNAME_DEFAULT, ADMIN_PASSWORD_DEFAULT,
"cluster-1", archivePath, null, "testwebapp");
assertNotNull(result, "Application deployment failed");
logger.info("Application deployment returned {0}", result.toString());
assertEquals("202", result.stdout(), "Deployment didn't return HTTP status code 202");

// check if the application is accessible inside of a server pod using quick retry policy
logger.info("Check and wait for the application to become ready");
for (int i = 1; i <= replicaCount; i++) {
checkAppIsRunning(withQuickRetryPolicy, namespace, managedServerPodNamePrefix + i,
"8001", "testwebapp/index.jsp", managedServerPodNamePrefix + i);
}
}

/**
* Check application availability while the operator upgrade is happening and once the ugprade is complete
* by accessing the application inside the managed server pods.
*/
private static void collectAppAvailability(
String domainNamespace,
String operatorNamespace,
List<Integer> appAvailability,
String managedServerPrefix,
int replicaCount,
String internalPort,
String appPath
) {
// Access the pod periodically to check application's availability while upgrade is happening
// and after upgrade is complete.
// appAccessedAfterUpgrade is used to access the app once after upgrade is complete
boolean appAccessedAfterUpgrade = false;
while (!appAccessedAfterUpgrade) {
boolean isUpgradeComplete = checkHelmReleaseRevision(OPERATOR_RELEASE_NAME, operatorNamespace, "2");
// upgrade is not complete or app is not accessed after upgrade
if (!isUpgradeComplete || !appAccessedAfterUpgrade) {
for (int i = 1; i <= replicaCount; i++) {
if (appAccessibleInPod(
domainNamespace,
managedServerPrefix + i,
internalPort,
appPath,
managedServerPrefix + i)) {
appAvailability.add(1);
logger.fine("application is accessible in pod " + managedServerPrefix + i);
} else {
appAvailability.add(0);
logger.fine("application is not accessible in pod " + managedServerPrefix + i);
}
}
}
if (isUpgradeComplete) {
logger.info("Upgrade is complete and app is accessed after upgrade");
appAccessedAfterUpgrade = true;
}

}
}

private static boolean appAlwaysAvailable(List<Integer> appAvailability) {
for (Integer count : appAvailability) {
if (count == 0) {
logger.warning("Application was not available during operator upgrade.");
return false;
}
}
return true;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,17 @@ public static boolean isHelmReleaseFailed(String releaseName, String namespace)
return Helm.isReleaseFailed(releaseName, namespace);
}

/**
* Check Helm release revision against expected.
* @param releaseName release name which is unique in a namespace
* @param namespace namespace name
* @param revision expected revision for the helm release
* @return true on success
*/
public static boolean checkHelmReleaseRevision(String releaseName, String namespace, String revision) {
return Helm.checkHelmReleaseRevision(releaseName, namespace, revision);
}

/**
* Check if a pod is restarted based on podCreationTimestamp.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,24 @@ public static boolean checkHelmReleaseStatus(String releaseName, String namespac
return false;
}

/**
* Check Helm release revision against expected.
* @param releaseName release name which is unique in a namespace
* @param namespace namespace name
* @param revision expected value
* @return true on success
*/
public static boolean checkHelmReleaseRevision(String releaseName, String namespace, String revision) {
CommandParams cmdParams = Command.defaultCommandParams()
.command(String.format("helm status %s -n %s", releaseName, namespace))
.saveResults(true)
.redirect(false);

if (Command.withParams(cmdParams)
.execute()) {
return cmdParams.stdout().toLowerCase().contains("revision: " + revision);
}
return false;
}

}
Loading