Skip to content

Commit 911a2eb

Browse files
ebeigartsB4nan
andauthored
fix: use correct config for storage classes to avoid memory leaks (#3144)
Currently RequestQueue, Dataset, KeyValueStore is always referencing the global config (`Configuration.getGlobalConfig()`) instead of the config that is passed to the crawler. <img width="915" height="910" alt="Screenshot 2025-08-28 at 10 31 45" src="https://github.com/user-attachments/assets/4ba9171d-7f74-4470-b206-82ba5d36f742" /> <img width="1056" height="1283" alt="Screenshot 2025-08-29 at 13 34 22" src="https://github.com/user-attachments/assets/0b18a2a6-dde4-43a5-8fda-28144ec307c9" /> --------- Co-authored-by: Martin Adámek <banan23@gmail.com>
1 parent 2094fdd commit 911a2eb

File tree

3 files changed

+30
-8
lines changed

3 files changed

+30
-8
lines changed

packages/basic-crawler/src/internals/basic-crawler.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -976,7 +976,11 @@ export class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext
976976
this.shouldLogMaxProcessedRequestsExceeded = true;
977977
this.shouldLogMaxEnqueuedRequestsExceeded = true;
978978

979-
await purgeDefaultStorages({ onlyPurgeOnce: true });
979+
await purgeDefaultStorages({
980+
onlyPurgeOnce: true,
981+
client: this.config.getStorageClient(),
982+
config: this.config,
983+
});
980984

981985
if (requests) {
982986
await this.addRequests(requests, addRequestsOptions);

packages/core/src/storages/storage_manager.ts

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,15 @@ export class StorageManager<T extends IStorage = IStorage> {
8181
if (!storage) {
8282
client ??= this.config.getStorageClient();
8383
const storageObject = await this._getOrCreateStorage(idOrName, this.name, client);
84-
storage = new this.StorageConstructor({
85-
id: storageObject.id,
86-
name: storageObject.name,
87-
storageObject,
88-
client,
89-
});
90-
84+
storage = new this.StorageConstructor(
85+
{
86+
id: storageObject.id,
87+
name: storageObject.name,
88+
storageObject,
89+
client,
90+
},
91+
this.config,
92+
);
9193
this._addStorageToCache(storage);
9294
}
9395

test/core/crawlers/basic_crawler.test.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1796,5 +1796,21 @@ describe('BasicCrawler', () => {
17961796
expect(crawlerA.stats.state.requestsFinished).toBe(1);
17971797
expect(crawlerB.stats.state.requestsFinished).toBe(1);
17981798
});
1799+
1800+
test('Crawlers with different Configurations does not use global Configuration', async () => {
1801+
const getGlobalConfigSpy = vitest.spyOn(Configuration, 'getGlobalConfig');
1802+
1803+
const configA = new Configuration({ persistStorage: false });
1804+
const crawlerA = new BasicCrawler({ requestHandler: () => {} }, configA);
1805+
const configB = new Configuration({ persistStorage: false });
1806+
const crawlerB = new BasicCrawler({ requestHandler: () => {} }, configB);
1807+
1808+
await crawlerA.run([{ url: `http://${HOSTNAME}:${port}` }]);
1809+
await crawlerB.run([{ url: `http://${HOSTNAME}:${port}` }]);
1810+
1811+
expect(getGlobalConfigSpy.mock.calls.length).toBe(0);
1812+
expect(crawlerA.requestQueue?.config).toBe(configA);
1813+
expect(crawlerB.requestQueue?.config).toBe(configB);
1814+
});
17991815
});
18001816
});

0 commit comments

Comments
 (0)