elastic
diff --git a/‎x-pack/plugin/stateless/src/main/java/module-info.java‎
Lines changed: 8 additions & 1 deletion b/‎x-pack/plugin/stateless/src/main/java/module-info.java‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎x-pack/plugin/stateless/src/main/java/org/elasticsearch/xpack/stateless/StatelessPlugin.java‎
Lines changed: 178 additions & 0 deletions b/‎x-pack/plugin/stateless/src/main/java/org/elasticsearch/xpack/stateless/StatelessPlugin.java‎
Lines changed: 178 additions & 0 deletions
diff --git a/‎x-pack/plugin/stateless/src/main/java/org/elasticsearch/xpack/stateless/cache/Lucene90CompoundEntriesReader.java‎
Lines changed: 66 additions & 0 deletions b/‎x-pack/plugin/stateless/src/main/java/org/elasticsearch/xpack/stateless/cache/Lucene90CompoundEntriesReader.java‎
Lines changed: 66 additions & 0 deletions
diff --git a/‎x-pack/plugin/stateless/src/main/java/org/elasticsearch/xpack/stateless/commits/AbstractBatchedCompoundCommit.java‎
Lines changed: 16 additions & 0 deletions b/‎x-pack/plugin/stateless/src/main/java/org/elasticsearch/xpack/stateless/commits/AbstractBatchedCompoundCommit.java‎
Lines changed: 16 additions & 0 deletions
@@ -7,10 +7,17 @@
 
 module org.elasticsearch.xpack.stateless {
     requires org.elasticsearch.base;
+    requires org.elasticsearch.blobcache;
+    requires org.elasticsearch.logging;
     requires org.elasticsearch.server;
     requires org.elasticsearch.xcore;
+    requires org.elasticsearch.xcontent;
     requires org.apache.logging.log4j;
-    requires org.elasticsearch.logging;
+    requires org.apache.lucene.core;
 
     exports org.elasticsearch.xpack.stateless;
+    exports org.elasticsearch.xpack.stateless.cache;
+    exports org.elasticsearch.xpack.stateless.commits;
+    exports org.elasticsearch.xpack.stateless.engine;
+    exports org.elasticsearch.xpack.stateless.lucene;
 }
@@ -8,8 +8,11 @@
 
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.node.DiscoveryNodeRole;
+import org.elasticsearch.cluster.routing.ShardRouting;
 import org.elasticsearch.common.settings.Setting;
 import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.util.concurrent.EsExecutors;
+import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.license.License;
 import org.elasticsearch.license.LicensedFeature;
 import org.elasticsearch.license.XPackLicenseState;
@@ -19,6 +22,9 @@
 import org.elasticsearch.plugins.ClusterCoordinationPlugin;
 import org.elasticsearch.plugins.ExtensiblePlugin;
 import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.repositories.blobstore.BlobStoreRepository;
+import org.elasticsearch.threadpool.ExecutorBuilder;
+import org.elasticsearch.threadpool.ScalingExecutorBuilder;
 import org.elasticsearch.xpack.core.XPackPlugin;
 
 import java.io.IOException;
@@ -54,7 +60,168 @@ public class StatelessPlugin extends Plugin implements ClusterCoordinationPlugin
 
     public static final String NAME = "stateless";
 
+    // Thread pool names are defined in the BlobStoreRepository because we need to verify there that no requests are running on other pools.
+    public static final String SHARD_READ_THREAD_POOL = BlobStoreRepository.STATELESS_SHARD_READ_THREAD_NAME;
+    public static final String SHARD_READ_THREAD_POOL_SETTING = "stateless." + SHARD_READ_THREAD_POOL + "_thread_pool";
+    public static final String TRANSLOG_THREAD_POOL = BlobStoreRepository.STATELESS_TRANSLOG_THREAD_NAME;
+    public static final String TRANSLOG_THREAD_POOL_SETTING = "stateless." + TRANSLOG_THREAD_POOL + "_thread_pool";
+    public static final String SHARD_WRITE_THREAD_POOL = BlobStoreRepository.STATELESS_SHARD_WRITE_THREAD_NAME;
+    public static final String SHARD_WRITE_THREAD_POOL_SETTING = "stateless." + SHARD_WRITE_THREAD_POOL + "_thread_pool";
+    public static final String CLUSTER_STATE_READ_WRITE_THREAD_POOL = BlobStoreRepository.STATELESS_CLUSTER_STATE_READ_WRITE_THREAD_NAME;
+    public static final String CLUSTER_STATE_READ_WRITE_THREAD_POOL_SETTING = "stateless."
+        + CLUSTER_STATE_READ_WRITE_THREAD_POOL
+        + "_thread_pool";
+    public static final String GET_VIRTUAL_BATCHED_COMPOUND_COMMIT_CHUNK_THREAD_POOL = "stateless_get_vbcc_chunk";
+    public static final String GET_VIRTUAL_BATCHED_COMPOUND_COMMIT_CHUNK_THREAD_POOL_SETTING = "stateless."
+        + GET_VIRTUAL_BATCHED_COMPOUND_COMMIT_CHUNK_THREAD_POOL
+        + "_thread_pool";
+    public static final String FILL_VIRTUAL_BATCHED_COMPOUND_COMMIT_CACHE_THREAD_POOL = "stateless_fill_vbcc_cache";
+    public static final String FILL_VIRTUAL_BATCHED_COMPOUND_COMMIT_CHUNK_THREAD_POOL_SETTING = "stateless."
+        + FILL_VIRTUAL_BATCHED_COMPOUND_COMMIT_CACHE_THREAD_POOL
+        + "_thread_pool";
+    public static final String PREWARM_THREAD_POOL = BlobStoreRepository.STATELESS_SHARD_PREWARMING_THREAD_NAME;
+    public static final String PREWARM_THREAD_POOL_SETTING = "stateless." + PREWARM_THREAD_POOL + "_thread_pool";
+    public static final String UPLOAD_PREWARM_THREAD_POOL = BlobStoreRepository.STATELESS_SHARD_UPLOAD_PREWARMING_THREAD_NAME;
+    public static final String UPLOAD_PREWARM_THREAD_POOL_SETTING = "stateless." + UPLOAD_PREWARM_THREAD_POOL + "_thread_pool";
+
+    /**
+     * The set of {@link ShardRouting.Role}s that we expect to see in a stateless deployment
+     */
+    public static final Set<ShardRouting.Role> STATELESS_SHARD_ROLES = Set.of(ShardRouting.Role.INDEX_ONLY, ShardRouting.Role.SEARCH_ONLY);
+
     private final boolean enabled;
+    private final boolean hasIndexRole;
+
+    public static ExecutorBuilder<?>[] statelessExecutorBuilders(Settings settings, boolean hasIndexRole) {
+        // TODO: Consider modifying these pool counts if we change the object store client connections based on node size.
+        // Right now we have 10 threads for snapshots, 1 or 8 threads for translog and 20 or 28 threads for shard thread pools. This is to
+        // attempt to keep the threads below the default client connections limit of 50. This assumption is currently broken by the snapshot
+        // metadata pool having 50 threads. But we will continue to iterate on this numbers and limits.
+
+        final int processors = EsExecutors.allocatedProcessors(settings);
+        final int shardReadMaxThreads;
+        final int translogCoreThreads;
+        final int translogMaxThreads;
+        final int shardWriteCoreThreads;
+        final int shardWriteMaxThreads;
+        final int clusterStateReadWriteCoreThreads;
+        final int clusterStateReadWriteMaxThreads;
+        final int getVirtualBatchedCompoundCommitChunkCoreThreads;
+        final int getVirtualBatchedCompoundCommitChunkMaxThreads;
+        final int fillVirtualBatchedCompoundCommitCacheCoreThreads;
+        final int fillVirtualBatchedCompoundCommitCacheMaxThreads;
+        final int prewarmMaxThreads;
+        final int uploadPrewarmCoreThreads;
+        final int uploadPrewarmMaxThreads;
+
+        if (hasIndexRole) {
+            shardReadMaxThreads = Math.min(processors * 4, 10);
+            translogCoreThreads = 2;
+            translogMaxThreads = Math.min(processors * 2, 8);
+            shardWriteCoreThreads = 2;
+            shardWriteMaxThreads = Math.min(processors * 4, 10);
+            clusterStateReadWriteCoreThreads = 2;
+            clusterStateReadWriteMaxThreads = 4;
+            getVirtualBatchedCompoundCommitChunkCoreThreads = 1;
+            getVirtualBatchedCompoundCommitChunkMaxThreads = Math.min(processors, 4);
+            fillVirtualBatchedCompoundCommitCacheCoreThreads = 0;
+            fillVirtualBatchedCompoundCommitCacheMaxThreads = 1;
+            prewarmMaxThreads = Math.min(processors * 2, 32);
+            // These threads are used for prewarming the shared blob cache on upload, and are separate from the prewarm thread pool
+            // in order to avoid any deadlocks between the two (e.g., when two fillgaps compete). Since they are used to prewarm on upload,
+            // we use the same amount of max threads as the shard write pool.
+            // these threads use a sizeable thread-local direct buffer which might take a while to GC, so we prefer to keep some idle
+            // threads around to reduce churn and re-use the existing buffers more
+            uploadPrewarmMaxThreads = Math.min(processors * 4, 10);
+            uploadPrewarmCoreThreads = uploadPrewarmMaxThreads / 2;
+        } else {
+            shardReadMaxThreads = Math.min(processors * 4, 28);
+            translogCoreThreads = 0;
+            translogMaxThreads = 1;
+            shardWriteCoreThreads = 0;
+            shardWriteMaxThreads = 1;
+            clusterStateReadWriteCoreThreads = 0;
+            clusterStateReadWriteMaxThreads = 1;
+            getVirtualBatchedCompoundCommitChunkCoreThreads = 0;
+            getVirtualBatchedCompoundCommitChunkMaxThreads = 1;
+            prewarmMaxThreads = Math.min(processors * 4, 32);
+            // these threads use a sizeable thread-local direct buffer which might take a while to GC, so we prefer to keep some idle
+            // threads around to reduce churn and re-use the existing buffers more
+            fillVirtualBatchedCompoundCommitCacheCoreThreads = Math.max(processors / 2, 2);
+            fillVirtualBatchedCompoundCommitCacheMaxThreads = Math.max(processors, 2);
+            uploadPrewarmCoreThreads = 0;
+            uploadPrewarmMaxThreads = 1;
+        }
+
+        return new ExecutorBuilder<?>[] {
+            new ScalingExecutorBuilder(
+                SHARD_READ_THREAD_POOL,
+                4,
+                shardReadMaxThreads,
+                TimeValue.timeValueMinutes(5),
+                true,
+                SHARD_READ_THREAD_POOL_SETTING,
+                EsExecutors.TaskTrackingConfig.builder().trackOngoingTasks().trackExecutionTime(0.3).build()
+            ),
+            new ScalingExecutorBuilder(
+                TRANSLOG_THREAD_POOL,
+                translogCoreThreads,
+                translogMaxThreads,
+                TimeValue.timeValueMinutes(5),
+                true,
+                TRANSLOG_THREAD_POOL_SETTING
+            ),
+            new ScalingExecutorBuilder(
+                SHARD_WRITE_THREAD_POOL,
+                shardWriteCoreThreads,
+                shardWriteMaxThreads,
+                TimeValue.timeValueMinutes(5),
+                true,
+                SHARD_WRITE_THREAD_POOL_SETTING
+            ),
+            new ScalingExecutorBuilder(
+                CLUSTER_STATE_READ_WRITE_THREAD_POOL,
+                clusterStateReadWriteCoreThreads,
+                clusterStateReadWriteMaxThreads,
+                TimeValue.timeValueMinutes(5),
+                true,
+                CLUSTER_STATE_READ_WRITE_THREAD_POOL_SETTING
+            ),
+            new ScalingExecutorBuilder(
+                GET_VIRTUAL_BATCHED_COMPOUND_COMMIT_CHUNK_THREAD_POOL,
+                getVirtualBatchedCompoundCommitChunkCoreThreads,
+                getVirtualBatchedCompoundCommitChunkMaxThreads,
+                TimeValue.timeValueMinutes(5),
+                true,
+                GET_VIRTUAL_BATCHED_COMPOUND_COMMIT_CHUNK_THREAD_POOL_SETTING
+            ),
+            new ScalingExecutorBuilder(
+                FILL_VIRTUAL_BATCHED_COMPOUND_COMMIT_CACHE_THREAD_POOL,
+                fillVirtualBatchedCompoundCommitCacheCoreThreads,
+                fillVirtualBatchedCompoundCommitCacheMaxThreads,
+                TimeValue.timeValueMinutes(5),
+                true,
+                FILL_VIRTUAL_BATCHED_COMPOUND_COMMIT_CHUNK_THREAD_POOL_SETTING
+            ),
+            new ScalingExecutorBuilder(
+                PREWARM_THREAD_POOL,
+                // these threads use a sizeable thread-local direct buffer which might take a while to GC, so we prefer to keep some idle
+                // threads around to reduce churn and re-use the existing buffers more
+                prewarmMaxThreads / 2,
+                prewarmMaxThreads,
+                TimeValue.timeValueMinutes(5),
+                true,
+                PREWARM_THREAD_POOL_SETTING
+            ),
+            new ScalingExecutorBuilder(
+                UPLOAD_PREWARM_THREAD_POOL,
+                uploadPrewarmCoreThreads,
+                uploadPrewarmMaxThreads,
+                TimeValue.timeValueMinutes(5),
+                true,
+                UPLOAD_PREWARM_THREAD_POOL_SETTING
+            ) };
+    }
 
     @Override
     public List<Setting<?>> getSettings() {
@@ -106,6 +273,7 @@ public StatelessPlugin(Settings settings) {
                 );
             }
         }
+        hasIndexRole = DiscoveryNode.hasRole(settings, DiscoveryNodeRole.INDEX_ROLE);
     }
 
     @Override
@@ -152,4 +320,14 @@ public void close() throws IOException {
     public boolean isEnabled() {
         return enabled;
     }
+
+    @Override
+    public List<ExecutorBuilder<?>> getExecutorBuilders(Settings settings) {
+        if (enabled) {
+            return List.of(statelessExecutorBuilders(settings, hasIndexRole));
+        } else {
+            return super.getExecutorBuilders(settings);
+        }
+    }
+
 }
@@ -0,0 +1,66 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.stateless.cache;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.CollectionUtil;
+import org.apache.lucene.util.StringHelper;
+import org.elasticsearch.index.store.LuceneFilesExtensions;
+
+import java.io.IOException;
+import java.util.Map;
+
+/**
+ * This file is mostly copied from org.apache.lucene.codecs.lucene90.Lucene90CompoundReader
+ * in order to be able to parse compound segment entries in order to prewarm them.
+ * Currently, it is impossible to reuse the original class as the necessary code has private access
+ */
+public class Lucene90CompoundEntriesReader {
+
+    static final String ENTRY_CODEC = "Lucene90CompoundEntries";
+    static final int VERSION_START = 0;
+    static final int VERSION_CURRENT = VERSION_START;
+
+    public static Map<String, FileEntry> readEntries(Directory directory, String filename) throws IOException {
+        assert LuceneFilesExtensions.fromFile(filename) == LuceneFilesExtensions.CFE : filename;
+        try (var input = directory.openInput(filename, IOContext.READONCE)) {
+            return Lucene90CompoundEntriesReader.readEntries(input);
+        }
+    }
+
+    /**
+     * This method skips the input validation and only lists the entries in a cfe file.
+     * Validation is going to be performed later once directory is opened for the index engine.
+     */
+    public static Map<String, FileEntry> readEntries(DataInput dataInput) throws IOException {
+        CodecUtil.checkHeader(dataInput, ENTRY_CODEC, VERSION_START, VERSION_CURRENT);
+        dataInput.skipBytes(StringHelper.ID_LENGTH);
+        CodecUtil.checkIndexHeaderSuffix(dataInput, "");
+        return readMapping(dataInput);
+    }
+
+    private static Map<String, FileEntry> readMapping(DataInput entriesStream) throws IOException {
+        final int numEntries = entriesStream.readVInt();
+        var mapping = CollectionUtil.<String, FileEntry>newHashMap(numEntries);
+        for (int i = 0; i < numEntries; i++) {
+            final String id = entriesStream.readString();
+            final FileEntry fileEntry = new FileEntry(entriesStream.readLong(), entriesStream.readLong());
+            FileEntry previous = mapping.put(id, fileEntry);
+            if (previous != null) {
+                throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS ", entriesStream);
+            }
+        }
+        return mapping;
+    }
+
+    public record FileEntry(long offset, long length) {}
+}
@@ -0,0 +1,16 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.stateless.commits;
+
+import org.elasticsearch.xpack.stateless.engine.PrimaryTermAndGeneration;
+
+public interface AbstractBatchedCompoundCommit {
+    PrimaryTermAndGeneration primaryTermAndGeneration();
+
+    StatelessCompoundCommit lastCompoundCommit();
+}