Description
Prerequisites
- I have written a descriptive issue title
- I have searched existing issues to ensure the performance issue has not already been reported
Last performant version
unknown
Slowed down in version
8.4.3
Node.js version
20.9.0
🦥 Performance issue
We have a high throughput application where our performance is limited by the number of db writes. During our testing we found mongoose's insertMany to be extremely slow compared to the insertMany provided by the mongodb package.
We were able to achieve a maximum throughput of 1900 documents written per second on a single unindexed collection by using batches of 50 documents per insertMany, with 1000 concurrent insertmany calls and minPoolSize set to 1000.
With the mongodb package, we were able to achieve 58,000 writes per second by using a similar concurrency of 1000 and 200 documents per insertMany call. This means that for this part of our application we have to bypass mongoose and use the underlying mongodb api.
Using Collection.create, we were able to achieve only a maximum of 650 documents inserted per second.
Steps to Reproduce
This is the code I was using for testing. You'd have to replace the uri with your uri obviously.
With the same settings, mongoose performs worse. If we recreate and use our production collection (with 50+ fields of varying types) we get significantly lower throughput for mongooes but we're still able to get higher throughput for native insertManys. Additionally, if I set batchSize to be too high for mongoose, I get an out of memory error which may indicate something about what's causing the performance issue (or not?). I got the error by using await benchmarkBulkInsert(writeMongoose, 1000, 1000);
const dotenv = require('dotenv');
const mongoose = require('mongoose')
dotenv.config({
path: '.env.mongo.benchmark',
});
const uri = process.env.DB_URI
const name = 'benchmark'
const documentSchema = new mongoose.Schema({
foo: String,
});
const Collection = mongoose.model('Collection', documentSchema);
let NativeCollection;
async function connectExtra() {
await mongoose.connect(uri, {
retryWrites: true,
dbName: name,
minPoolSize: 1000,
maxPoolSize: 1000
});
console.log(`[CONNECTED TO DB]: ${name}`);
mongoose.connection.on('error', (dbError) => {
throw new Error(`Couldn't connect to the database:${dbError}`);
});
const client = mongoose.connection.getClient();
const db = client.db('benchmark');
NativeCollection = db.collection('collections');
return mongoose.connection;
}
async function writeNative(documents) {
await NativeCollection.insertMany(documents, {forceServerObjectId: true});
}
async function writeMongoose(documents) {
await Collection.insertMany(documents);
}
async function connectToDB() {
await connectExtra(uri, name); // Ensure your db module handles this appropriately
}
function createDocument() {
// Needs to match collection
return {
foo: 'bar'
};
}
async function benchmarkBulkInsert(writeFunction, concurrencyLevel, batchSize) {
let totalInserted = 0;
const documents = [];
for (let i = 0; i < batchSize; i++) {
documents.push(createDocument());
}
const preDocumentCount = await Collection.countDocuments({});
console.log(`Starting with ${preDocumentCount} documents in the database.`);
const insertManyTask = async () => {
try {
await writeFunction(documents);
return batchSize; // Return the number of inserted documents
} catch (error) {
console.error('Error during batch insert:', error);
return 0; // Return zero if the insert fails
}
};
const tasks = [];
const endTime = Date.now() + 1000; // Run for 1 second
const startTime = new Date();
while (Date.now() < endTime) {
if (tasks.length < concurrencyLevel) {
tasks.push(insertManyTask());
} else {
const results = await Promise.all(tasks);
totalInserted += results.reduce((acc, val) => acc + val, 0); // Sum up results
tasks.length = 0; // Clear the array after all promises have resolved
}
}
if (tasks.length > 0) {
const results = await Promise.all(tasks);
totalInserted += results.reduce((acc, val) => acc + val, 0); // Sum up remaining results
}
const endedAt = new Date();
const runTime = (endedAt.getTime() - startTime.getTime()) / 1000
console.log('Ran in ', runTime);
const postDocumentCount = await Collection.countDocuments({});
console.log(`Ending with ${postDocumentCount} documents in the database.`);
console.log(`Inserted ${postDocumentCount - preDocumentCount} documents`);
console.log(`Inserted ${totalInserted} documents`);
console.log(`${(postDocumentCount - preDocumentCount) / runTime} Docs per Second`)
}
async function test() {
await connectToDB();
// Call w/ writeFunc, concurrency, batchSize
// I get out of memory errors if I change batchSize to be too high for writeMongoose
await benchmarkBulkInsert(writeMongoose, 1000, 50);
await benchmarkBulkInsert(writeNative, 1000, 2000);
process.exit(0);
}
test()
Expected Behavior
I would expect the throughput of mongoose's insertMany to exactly match the throughput of the native mongodb insertMany.