Skip to content

Commit ef8ca62

Browse files
authored
Reduce port range re-use in tests (#85777) (#85859)
Today we select the port range to use in tests by taking the Gradle worker ID mod 223. We now use significantly more than 223 Gradle workers in each test run which means port ranges are re-used, resulting in spurious failures. This commit replaces the magic number 223 with a computation defined in terms of the available ports and the number of ports to allocate per worker. It also reduces the number of ports per worker to 30 to try and reduce the frequency of port clashes.
1 parent de4adff commit ef8ca62

File tree

2 files changed

+62
-30
lines changed

2 files changed

+62
-30
lines changed

test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java

Lines changed: 60 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,6 @@ public abstract class ESTestCase extends LuceneTestCase {
193193
private static final AtomicInteger portGenerator = new AtomicInteger();
194194

195195
private static final Collection<String> loggedLeaks = new ArrayList<>();
196-
public static final int MIN_PRIVATE_PORT = 13301;
197196

198197
private HeaderWarningAppender headerWarningAppender;
199198

@@ -1651,38 +1650,71 @@ public static boolean inFipsJvm() {
16511650
return Boolean.parseBoolean(System.getProperty(FIPS_SYSPROP));
16521651
}
16531652

1653+
/*
1654+
* [NOTE: Port ranges for tests]
1655+
*
1656+
* Some tests involve interactions over the localhost interface of the machine running the tests. The tests run concurrently in multiple
1657+
* JVMs, but all have access to the same network, so there's a risk that different tests will interact with each other in unexpected
1658+
* ways and trigger spurious failures. Gradle numbers its workers sequentially starting at 1 and each worker can determine its own
1659+
* identity from the {@link #TEST_WORKER_SYS_PROPERTY} system property. We use this to try and assign disjoint port ranges to each test
1660+
* worker, avoiding any unexpected interactions, although if we spawn enough test workers then we will wrap around to the beginning
1661+
* again.
1662+
*/
1663+
1664+
/**
1665+
* Defines the size of the port range assigned to each worker, which must be large enough to supply enough ports to run the tests, but
1666+
* not so large that we run out of ports. See also [NOTE: Port ranges for tests].
1667+
*/
1668+
private static final int PORTS_PER_WORKER = 30;
1669+
1670+
/**
1671+
* Defines the minimum port that test workers should use. See also [NOTE: Port ranges for tests].
1672+
*/
1673+
protected static final int MIN_PRIVATE_PORT = 13301;
1674+
1675+
/**
1676+
* Defines the maximum port that test workers should use. See also [NOTE: Port ranges for tests].
1677+
*/
1678+
private static final int MAX_PRIVATE_PORT = 36600;
1679+
1680+
/**
1681+
* Wrap around after reaching this worker ID.
1682+
*/
1683+
private static final int MAX_EFFECTIVE_WORKER_ID = (MAX_PRIVATE_PORT - MIN_PRIVATE_PORT - PORTS_PER_WORKER + 1) / PORTS_PER_WORKER - 1;
1684+
1685+
static {
1686+
assert getWorkerBasePort(MAX_EFFECTIVE_WORKER_ID) + PORTS_PER_WORKER - 1 <= MAX_PRIVATE_PORT;
1687+
}
1688+
16541689
/**
1655-
* Returns a unique port range for this JVM starting from the computed base port
1690+
* Returns a port range for this JVM according to its Gradle worker ID. See also [NOTE: Port ranges for tests].
16561691
*/
16571692
public static String getPortRange() {
1658-
return getBasePort() + "-" + (getBasePort() + 99); // upper bound is inclusive
1659-
}
1660-
1661-
protected static int getBasePort() {
1662-
// some tests use MockTransportService to do network based testing. Yet, we run tests in multiple JVMs that means
1663-
// concurrent tests could claim port that another JVM just released and if that test tries to simulate a disconnect it might
1664-
// be smart enough to re-connect depending on what is tested. To reduce the risk, since this is very hard to debug we use
1665-
// a different default port range per JVM unless the incoming settings override it
1666-
// use a non-default base port otherwise some cluster in this JVM might reuse a port
1667-
1668-
// We rely on Gradle implementation details here, the worker IDs are long values incremented by one for the
1669-
// lifespan of the daemon this means that they can get larger than the allowed port range.
1670-
// Ephemeral ports on Linux start at 32768 so we modulo to make sure that we don't exceed that.
1671-
// This is safe as long as we have fewer than 224 Gradle workers running in parallel
1672-
// See also: https://github.com/elastic/elasticsearch/issues/44134
1673-
final String workerIdStr = System.getProperty(ESTestCase.TEST_WORKER_SYS_PROPERTY);
1674-
final int startAt;
1693+
final var firstPort = getWorkerBasePort();
1694+
final var lastPort = firstPort + PORTS_PER_WORKER - 1; // upper bound is inclusive
1695+
assert MIN_PRIVATE_PORT <= firstPort && lastPort <= MAX_PRIVATE_PORT;
1696+
return firstPort + "-" + lastPort;
1697+
}
1698+
1699+
/**
1700+
* Returns the start of the port range for this JVM according to its Gradle worker ID. See also [NOTE: Port ranges for tests].
1701+
*/
1702+
protected static int getWorkerBasePort() {
1703+
final var workerIdStr = System.getProperty(ESTestCase.TEST_WORKER_SYS_PROPERTY);
16751704
if (workerIdStr == null) {
1676-
startAt = 0; // IDE
1677-
} else {
1678-
// we adjust the gradle worker id with mod so as to not go over the ephemoral port ranges, but gradle continually
1679-
// increases this value, so the mod can eventually become zero, thus we shift on both sides by 1
1680-
final long workerId = Long.valueOf(workerIdStr);
1681-
assert workerId >= 1 : "Non positive gradle worker id: " + workerIdStr;
1682-
startAt = Math.floorMod(workerId - 1, 223) + 1;
1705+
// running in IDE
1706+
return MIN_PRIVATE_PORT;
16831707
}
1684-
assert startAt >= 0 : "Unexpected test worker Id, resulting port range would be negative";
1685-
return MIN_PRIVATE_PORT + (startAt * 100);
1708+
1709+
final var workerId = Integer.parseInt(workerIdStr);
1710+
assert workerId >= 1 : "Non positive gradle worker id: " + workerIdStr;
1711+
return getWorkerBasePort(workerId % (MAX_EFFECTIVE_WORKER_ID + 1));
1712+
}
1713+
1714+
private static int getWorkerBasePort(int effectiveWorkerId) {
1715+
assert 0 <= effectiveWorkerId && effectiveWorkerId <= MAX_EFFECTIVE_WORKER_ID;
1716+
// the range [MIN_PRIVATE_PORT, MIN_PRIVATE_PORT+PORTS_PER_WORKER) is only for running outside of Gradle
1717+
return MIN_PRIVATE_PORT + PORTS_PER_WORKER + effectiveWorkerId * PORTS_PER_WORKER;
16861718
}
16871719

16881720
protected static InetAddress randomIp(boolean v4) {

test/framework/src/test/java/org/elasticsearch/test/test/ESTestCaseTests.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,12 +183,12 @@ public void testWorkerSystemProperty() {
183183
public void testBasePortGradle() {
184184
assumeTrue("requires running tests with Gradle", System.getProperty("tests.gradle") != null);
185185
// Gradle worker IDs are 1 based
186-
assertNotEquals(10300, ESTestCase.getBasePort());
186+
assertNotEquals(ESTestCase.MIN_PRIVATE_PORT, ESTestCase.getWorkerBasePort());
187187
}
188188

189189
public void testBasePortIDE() {
190190
assumeTrue("requires running tests without Gradle", System.getProperty("tests.gradle") == null);
191-
assertEquals(10300, ESTestCase.getBasePort());
191+
assertEquals(ESTestCase.MIN_PRIVATE_PORT, ESTestCase.getWorkerBasePort());
192192
}
193193

194194
public void testRandomDateFormatterPattern() {

0 commit comments

Comments
 (0)