From bd251a9a19e5d802cc82deae1338bbcabb53f95f Mon Sep 17 00:00:00 2001 From: Gaurav Bafna <85113518+gbbafna@users.noreply.github.com> Date: Tue, 15 Oct 2024 16:36:22 +0530 Subject: [PATCH] Add support to dynamically resize threadpools size (#16236) (#16330) Signed-off-by: Gaurav Bafna --- CHANGELOG.md | 1 + .../cluster/settings/ClusterSettingsIT.java | 108 ++++++++++++++++++ .../common/settings/ClusterSettings.java | 5 +- .../main/java/org/opensearch/node/Node.java | 1 + .../org/opensearch/threadpool/ThreadPool.java | 101 ++++++++++++++++ .../threadpool/ThreadPoolTests.java | 44 +++++++ 6 files changed, 259 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3090d0b702f6b..64ad54c0a3bed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Tiered Caching] Segmented cache changes ([#16047](https://github.com/opensearch-project/OpenSearch/pull/16047)) - Add support for msearch API to pass search pipeline name - ([#15923](https://github.com/opensearch-project/OpenSearch/pull/15923)) - Add success and failure metrics for async shard fetch ([#15976](https://github.com/opensearch-project/OpenSearch/pull/15976)) +- Add support to dynamically resize threadpools size. ([#16236](https://github.com/opensearch-project/OpenSearch/pull/16236)) - [S3 Repository] Change default retry mechanism of s3 clients to Standard Mode ([#15978](https://github.com/opensearch-project/OpenSearch/pull/15978)) - [Workload Management] Add Integration Tests for Workload Management CRUD APIs ([#15955](https://github.com/opensearch-project/OpenSearch/pull/15955)) - Add new metric REMOTE_STORE to NodeStats API response ([#15611](https://github.com/opensearch-project/OpenSearch/pull/15611)) diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/settings/ClusterSettingsIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/settings/ClusterSettingsIT.java index 708be8b33fda8..36276c2da0045 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/settings/ClusterSettingsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/settings/ClusterSettingsIT.java @@ -379,6 +379,114 @@ public void testMissingUnits() { } } + public void testThreadPoolSettings() { + // wrong threadpool + try { + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put("cluster.thread_pool.wrong.max", "-1").build()) + .get(); + fail("bogus value"); + } catch (IllegalArgumentException ex) { + assertTrue(ex.getCause().getMessage().contains("illegal thread_pool name : ")); + } + + // Scaling threadpool - negative value + try { + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put("cluster.thread_pool.snapshot.max", "-1").build()) + .get(); + fail("bogus value"); + } catch (IllegalArgumentException ex) { + assertEquals(ex.getCause().getMessage(), "illegal value for [cluster.thread_pool.snapshot], has to be positive value"); + } + + // Scaling threadpool - Other than max and core + try { + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put("cluster.thread_pool.snapshot.wrong", "-1").build()) + .get(); + fail("bogus value"); + } catch (IllegalArgumentException ex) { + assertEquals(ex.getCause().getMessage(), "illegal thread_pool config : [wrong] should only have [core, max]"); + } + + // Scaling threadpool - core > max + try { + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder().put("cluster.thread_pool.snapshot.core", "2").put("cluster.thread_pool.snapshot.max", "1").build() + ) + .get(); + fail("bogus value"); + } catch (IllegalArgumentException ex) { + assertEquals(ex.getCause().getMessage(), "core threadpool size cannot be greater than max"); + } + + // Scaling threadpool - Max value lesser than default value of 4 + try { + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put("cluster.thread_pool.generic.max", "1").build()) + .get(); + fail("bogus value"); + } catch (IllegalArgumentException ex) { + assertEquals(ex.getCause().getMessage(), "core threadpool size cannot be greater than max"); + } + + // Scaling threadpool - happy case - transient overrides persistent + ClusterUpdateSettingsResponse clusterUpdateSettingsResponse = client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder().put("cluster.thread_pool.snapshot.core", "2").put("cluster.thread_pool.snapshot.max", "2").build() + ) + .setPersistentSettings(Settings.builder().put("cluster.thread_pool.snapshot.max", "1").build()) + .get(); + assertTrue(clusterUpdateSettingsResponse.isAcknowledged()); + + // Fixed threadpool - Other than size + try { + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put("cluster.thread_pool.get.wrong", "-1").build()) + .get(); + fail("bogus value"); + } catch (IllegalArgumentException ex) { + assertEquals(ex.getCause().getMessage(), "illegal thread_pool config : [wrong] should only have [size]"); + } + + // Fixed threadpool - 0 value + try { + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put("cluster.thread_pool.get.size", "0").build()) + .get(); + fail("bogus value"); + } catch (IllegalArgumentException ex) { + assertEquals(ex.getCause().getMessage(), "illegal value for [cluster.thread_pool.get], has to be positive value"); + } + + // Fixed threadpool - happy case + clusterUpdateSettingsResponse = client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put("cluster.thread_pool.get.size", "1").build()) + .setPersistentSettings(Settings.builder().put("cluster.thread_pool.get.size", "1").build()) + .get(); + assertTrue(clusterUpdateSettingsResponse.isAcknowledged()); + } + public void testLoggerLevelUpdate() { assertAcked(prepareCreate("test")); diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 1fee678c2e35f..c1c6055e640f6 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -810,7 +810,10 @@ public void apply(Settings value, Settings current, Settings previous) { // Settings to be used for limiting rest requests ResponseLimitSettings.CAT_INDICES_RESPONSE_LIMIT_SETTING, ResponseLimitSettings.CAT_SHARDS_RESPONSE_LIMIT_SETTING, - ResponseLimitSettings.CAT_SEGMENTS_RESPONSE_LIMIT_SETTING + ResponseLimitSettings.CAT_SEGMENTS_RESPONSE_LIMIT_SETTING, + + // Thread pool Settings + ThreadPool.CLUSTER_THREAD_POOL_SIZE_SETTING ) ) ); diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index d9a4b3208496f..e0bdd64daf5bb 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -623,6 +623,7 @@ protected Node( additionalSettingsFilter, settingsUpgraders ); + threadPool.registerClusterSettingsListeners(settingsModule.getClusterSettings()); scriptModule.registerClusterSettingsListeners(scriptService, settingsModule.getClusterSettings()); final NetworkService networkService = new NetworkService( getCustomNameResolvers(pluginsService.filterPlugins(DiscoveryPlugin.class)) diff --git a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java index 81ac565d53e02..ae1711012383b 100644 --- a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java +++ b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java @@ -37,6 +37,7 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.common.Nullable; import org.opensearch.common.annotation.PublicApi; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.SizeValue; @@ -57,11 +58,14 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.Executor; import java.util.concurrent.ExecutorService; import java.util.concurrent.RejectedExecutionException; @@ -121,6 +125,9 @@ public static class Names { public static final String REMOTE_STATE_CHECKSUM = "remote_state_checksum"; } + static Set scalingThreadPoolKeys = new HashSet<>(Arrays.asList("max", "core")); + static Set fixedThreadPoolKeys = new HashSet<>(Arrays.asList("size")); + /** * The threadpool type. * @@ -221,6 +228,12 @@ public Collection builders() { Setting.Property.NodeScope ); + public static final Setting CLUSTER_THREAD_POOL_SIZE_SETTING = Setting.groupSetting( + "cluster.thread_pool.", + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + public ThreadPool(final Settings settings, final ExecutorBuilder... customBuilders) { this(settings, null, customBuilders); } @@ -417,6 +430,94 @@ public Info info(String name) { return holder.info; } + public void registerClusterSettingsListeners(ClusterSettings clusterSettings) { + clusterSettings.addSettingsUpdateConsumer(CLUSTER_THREAD_POOL_SIZE_SETTING, this::setThreadPool, this::validateSetting); + } + + /* + Scaling threadpool can provide only max and core + Fixed/ResizableQueue can provide only size + + For example valid settings would be for scaling and fixed thead pool + cluster.threadpool.snapshot.max : "5", + cluster.threadpool.snapshot.core : "5", + cluster.threadpool.get.size : "2", + */ + private void validateSetting(Settings tpSettings) { + Map tpGroups = tpSettings.getAsGroups(); + for (Map.Entry entry : tpGroups.entrySet()) { + String tpName = entry.getKey(); + if (THREAD_POOL_TYPES.containsKey(tpName) == false) { + throw new IllegalArgumentException("illegal thread_pool name : " + tpName); + } + Settings tpGroup = entry.getValue(); + ExecutorHolder holder = executors.get(tpName); + assert holder.executor instanceof OpenSearchThreadPoolExecutor; + OpenSearchThreadPoolExecutor threadPoolExecutor = (OpenSearchThreadPoolExecutor) holder.executor; + if (holder.info.type == ThreadPoolType.SCALING) { + if (scalingThreadPoolKeys.containsAll(tpGroup.keySet()) == false) { + throw new IllegalArgumentException( + "illegal thread_pool config : " + tpGroup.keySet() + " should only have " + scalingThreadPoolKeys + ); + } + int max = tpGroup.getAsInt("max", threadPoolExecutor.getMaximumPoolSize()); + int core = tpGroup.getAsInt("core", threadPoolExecutor.getCorePoolSize()); + if (core < 1 || max < 1) { + throw new IllegalArgumentException("illegal value for [cluster.thread_pool." + tpName + "], has to be positive value"); + } else if (core > max) { + throw new IllegalArgumentException("core threadpool size cannot be greater than max"); + } + } else { + if (fixedThreadPoolKeys.containsAll(tpGroup.keySet()) == false) { + throw new IllegalArgumentException( + "illegal thread_pool config : " + tpGroup.keySet() + " should only have " + fixedThreadPoolKeys + ); + } + int size = tpGroup.getAsInt("size", threadPoolExecutor.getMaximumPoolSize()); + if (size < 1) { + throw new IllegalArgumentException("illegal value for [cluster.thread_pool." + tpName + "], has to be positive value"); + } + } + } + } + + public void setThreadPool(Settings tpSettings) { + Map tpGroups = tpSettings.getAsGroups(); + for (Map.Entry entry : tpGroups.entrySet()) { + String tpName = entry.getKey(); + Settings tpGroup = entry.getValue(); + ExecutorHolder holder = executors.get(tpName); + assert holder.executor instanceof OpenSearchThreadPoolExecutor; + OpenSearchThreadPoolExecutor executor = (OpenSearchThreadPoolExecutor) holder.executor; + if (holder.info.type == ThreadPoolType.SCALING) { + int max = tpGroup.getAsInt("max", executor.getMaximumPoolSize()); + int core = tpGroup.getAsInt("core", executor.getCorePoolSize()); + /* + If we are decreasing, core pool size has to be decreased first. + If we are increasing ,max pool size has to be increased first + This ensures that core pool is always smaller than max pool size . + Other wise IllegalArgumentException will be thrown from ThreadPoolExecutor + */ + if (core < executor.getCorePoolSize()) { + executor.setCorePoolSize(core); + executor.setMaximumPoolSize(max); + } else { + executor.setMaximumPoolSize(max); + executor.setCorePoolSize(core); + } + } else { + int size = tpGroup.getAsInt("size", executor.getMaximumPoolSize()); + if (size < executor.getCorePoolSize()) { + executor.setCorePoolSize(size); + executor.setMaximumPoolSize(size); + } else { + executor.setMaximumPoolSize(size); + executor.setCorePoolSize(size); + } + } + } + } + public ThreadPoolStats stats() { List stats = new ArrayList<>(); for (ExecutorHolder holder : executors.values()) { diff --git a/server/src/test/java/org/opensearch/threadpool/ThreadPoolTests.java b/server/src/test/java/org/opensearch/threadpool/ThreadPoolTests.java index 658de5ec49500..205bf7621c576 100644 --- a/server/src/test/java/org/opensearch/threadpool/ThreadPoolTests.java +++ b/server/src/test/java/org/opensearch/threadpool/ThreadPoolTests.java @@ -36,6 +36,7 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.FutureUtils; import org.opensearch.common.util.concurrent.OpenSearchExecutors; +import org.opensearch.common.util.concurrent.OpenSearchThreadPoolExecutor; import org.opensearch.test.OpenSearchTestCase; import java.util.concurrent.CountDownLatch; @@ -152,4 +153,47 @@ public void testInheritContextOnSchedule() throws InterruptedException { terminate(threadPool); } } + + public void testThreadPoolResize() { + TestThreadPool threadPool = new TestThreadPool("test"); + try { + // increase it + Settings commonSettings = Settings.builder().put("snapshot.max", "10").put("snapshot.core", "2").put("get.size", "100").build(); + threadPool.setThreadPool(commonSettings); + ExecutorService executorService = threadPool.executor("snapshot"); + OpenSearchThreadPoolExecutor executor = (OpenSearchThreadPoolExecutor) executorService; + assertEquals(10, executor.getMaximumPoolSize()); + assertEquals(2, executor.getCorePoolSize()); + + executorService = threadPool.executor("get"); + executor = (OpenSearchThreadPoolExecutor) executorService; + assertEquals(100, executor.getMaximumPoolSize()); + assertEquals(100, executor.getCorePoolSize()); + + // decrease it + commonSettings = Settings.builder().put("snapshot.max", "2").put("snapshot.core", "1").put("get.size", "90").build(); + threadPool.setThreadPool(commonSettings); + executorService = threadPool.executor("snapshot"); + executor = (OpenSearchThreadPoolExecutor) executorService; + assertEquals(2, executor.getMaximumPoolSize()); + assertEquals(1, executor.getCorePoolSize()); + + executorService = threadPool.executor("get"); + executor = (OpenSearchThreadPoolExecutor) executorService; + assertEquals(90, executor.getMaximumPoolSize()); + assertEquals(90, executor.getCorePoolSize()); + } finally { + terminate(threadPool); + } + } + + public void testThreadPoolResizeFail() { + TestThreadPool threadPool = new TestThreadPool("test"); + try { + Settings commonSettings = Settings.builder().put("snapshot.max", "50").put("snapshot.core", "100").build(); + assertThrows(IllegalArgumentException.class, () -> threadPool.setThreadPool(commonSettings)); + } finally { + terminate(threadPool); + } + } }