From 0d59a58f2525d47c0b427464ae2dd076806a41aa Mon Sep 17 00:00:00 2001 From: Kyle Farris Date: Tue, 23 Jul 2024 01:46:46 -0400 Subject: [PATCH] Recursive scanning actually works now... And should work on Windows as well. --- README.md | 19 ++++++++++++------- index.js | 40 +++++++++++++++++++++++++++++----------- tests/index.js | 33 ++++++++++++++++++--------------- 3 files changed, 59 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index f23662e..4d63011 100755 --- a/README.md +++ b/README.md @@ -352,10 +352,13 @@ If you choose to supply a `fileCallback`, the scan will run a little bit slower ### NOTE -The `goodFiles` and `badFiles` parameters of the `endCallback` callback in this method will only contain the directories that were scanned in **all** **but** the following scenarios: +The `goodFiles` parameter of the `endCallback` callback in this method will only contain the directory that was scanned in **all** **but** the following scenarios: - A `fileCallback` callback is provided, and `scanRecursively` is set to _true_. - The scanner is set to `clamdscan` and `scanRecursively` is set to _false_. +- The scanned directory contains 1 or more viruses. In this case, the `goodFiles` array will be empty. + +There will, however, be a total count of the good files which is calculated by determining the total number of files scanned and subtracting the number of bad files from that count. We simply can't provide a list of all good files due to the potential large memory usage implications of scanning a directory with, for example, _millions_ of files. ### Parameters @@ -363,9 +366,10 @@ The `goodFiles` and `badFiles` parameters of the `endCallback` callback in this - `endCallback` (function) (optional) Will be called when the entire directory has been completely scanned. This callback takes 3 parameters: - `err` (object) A standard javascript Error object (null if no error) - - `goodFiles` (array) List of the full paths to all files that are _clean_. + - `goodFiles` (array) An *empty* array if path is _infected_. An array containing the directory name that was passed in if _clean_. - `badFiles` (array) List of the full paths to all files that are _infected_. - `viruses` (array) List of all the viruses found (feature request: associate to the bad files). + - `numGoodFiles` (number) Number of files that were found to be clean. - `fileCallback` (function) (optional) Will be called after each file in the directory has been scanned. This is useful for keeping track of the progress of the scan. This callback takes 3 parameters: @@ -381,21 +385,22 @@ The `goodFiles` and `badFiles` parameters of the `endCallback` callback in this - `path` (string) The original `dir_path` passed into the `scanDir` method. - `isInfected` (boolean) **True**: File is infected; **False**: File is clean. **NULL**: Unable to scan. - - `goodFiles` (array) List of the full paths to all files that are _clean_. + - `goodFiles` (array) An *empty* array if path is _infected_. An array containing the directory name that was passed in if _clean_. - `badFiles` (array) List of the full paths to all files that are _infected_. - `viruses` (array) List of all the viruses found (feature request: associate to the bad files). + - `numGoodFiles` (number) Number of files that were found to be clean. ### Callback Example ```javascript -clamscan.scanDir('/some/path/to/scan', (err, goodFiles, badFiles, viruses) { +clamscan.scanDir('/some/path/to/scan', (err, goodFiles, badFiles, viruses, numGoodFiles) { if (err) return console.error(err); if (badFiles.length > 0) { console.log(`${path} was infected. The offending files (${badFiles.join (', ')}) have been quarantined.`); console.log(`Viruses Found: ${viruses.join(', ')}`); } else { - console.log("Everything looks good! No problems here!."); + console.log(`${goodFiles[0]} looks good! ${numGoodFiles} file scanned and no problems found!.`); } }); ``` @@ -404,7 +409,7 @@ clamscan.scanDir('/some/path/to/scan', (err, goodFiles, badFiles, viruses) { ```javascript clamscan.scanDir('/some/path/to/scan').then(results => { - const { path, isInfected, goodFiles, badFiles, viruses } = results; + const { path, isInfected, goodFiles, badFiles, viruses, numGoodFiles } = results; //... }).catch(err => { return console.error(err); @@ -414,7 +419,7 @@ clamscan.scanDir('/some/path/to/scan').then(results => { ### Async/Await Example ```javascript -const { path, isInfected, goodFiles, badFiles, viruses } = await clamscan.scanDir('/some/path/to/scan'); +const { path, isInfected, goodFiles, badFiles, viruses, numGoodFiles } = await clamscan.scanDir('/some/path/to/scan'); ``` diff --git a/index.js b/index.js index feb55a7..c18f606 100755 --- a/index.js +++ b/index.js @@ -789,11 +789,11 @@ class NodeClam { } // Parse out the name of the virus(es) found... - const viruses = result + const viruses = Array.from(new Set(result // eslint-disable-next-line no-control-regex .split(/(\u0000|[\r\n])/) .map((v) => (/:\s+(.+)FOUND$/gm.test(v) ? v.replace(/(.+:\s+)(.+)FOUND/gm, '$2').trim() : null)) - .filter((v) => !!v); + .filter((v) => !!v))); return { isInfected: true, viruses, file, resultString: result, timeout }; } @@ -1907,11 +1907,11 @@ class NodeClam { * @returns {Promise} Object like: `{ path: String, isInfected: Boolean, goodFiles: Array, badFiles: Array, viruses: Array }` * @example * // Callback Method - * clamscan.scanDir('/some/path/to/scan', (err, goodFiles, badFiles, viruses) { + * clamscan.scanDir('/some/path/to/scan', (err, goodFiles, badFiles, viruses, numGoodFiles) { * if (err) return console.error(err); * * if (badFiles.length > 0) { - * console.log(`${path} was infected. The offending files (${badFiles.join (', ')}) have been quarantined.`); + * console.log(`${path} was infected. The offending files (${badFiles.map(v => `${v.file} (${v.virus})`).join (', ')}) have been quarantined.`); * console.log(`Viruses Found: ${viruses.join(', ')}`); * } else { * console.log('Everything looks good! No problems here!.'); @@ -2034,7 +2034,6 @@ class NodeClam { else if (typeof fileCb !== 'function' || !hasCb) { // Scan locally via socket (either TCP or Unix socket) // This is much simpler/faster process--potentially even more with MULTISCAN enabled) - if ( this.settings.clamdscan.socket || (this.settings.clamdscan.port && (!this.settings.clamdscan.host || this._isLocalHost())) @@ -2048,10 +2047,10 @@ class NodeClam { if (this.settings.clamdscan.multiscan === true) { // Use Multiple threads (faster) - client.write(`MULTISCAN ${path} `); + client.write(`MULTISCAN ${path}`); } else { // Use single or default # of threads (potentially slower) - client.write(`SCAN ${path} `); + client.write(`CONTSCAN ${path}`); } // Where to buffer string response (not a real "Buffer", per se...) @@ -2068,8 +2067,8 @@ class NodeClam { .on('end', async () => { if (this.settings.debugMode) console.log(`${this.debugLabel}: Received response from remote clamd service.`); - const response = Buffer.concat(chunks); + const response = Buffer.concat(chunks); const result = this._processResult(response.toString(), path); if (result instanceof Error) { // Fallback to local if that's an option @@ -2084,12 +2083,31 @@ class NodeClam { // Fully close the client client.end(); + if (this.settings.debugMode) console.log(`${this.debugLabel}: Results: `, result); const { isInfected, viruses } = result; + + // If the path is infected, build out list of infected files + let badFiles = []; + if (isInfected) { + badFiles = Array.from(new Set(result.resultString.split(os.EOL).map(v => { + const [file, virus] = v.replace(/ FOUND$/, '').split(': '); + return { file, virus }; + }))); + } + + // Having a list of good files could use up all available memory if a big enough + // directory is scanned. Just return the scanned path if all files are good. const goodFiles = isInfected ? [] : [path]; - const badFiles = isInfected ? [path] : []; + + // Get a count of all the good files since that should be easy enough... + const numGoodFiles = (await getFiles(path)).length - badFiles.length; + + if (this.settings.debugMode) console.log(`${this.debugLabel}: Bad Files: `, badFiles); + if (this.settings.debugMode) console.log(`${this.debugLabel}: # Good Files: `, numGoodFiles); + return hasCb - ? endCb(null, goodFiles, badFiles, viruses) - : resolve({ path, isInfected, goodFiles, badFiles, viruses }); + ? endCb(null, goodFiles, badFiles, viruses, numGoodFiles) + : resolve({ path, isInfected, goodFiles, badFiles, viruses, numGoodFiles }); }); } catch (e) { const err = new NodeClamError( diff --git a/tests/index.js b/tests/index.js index 88ba8d7..c56bedb 100755 --- a/tests/index.js +++ b/tests/index.js @@ -1255,7 +1255,7 @@ describe('scanDir', () => { }); it('should supply badFiles array with scanned path when directory has infected files', (done) => { - clamscan.settings.scanRecursively = true; + // clamscan.settings.scanRecursively = true; eicarGen.writeFile(); clamscan.scanDir(badScanDir, (err, goodFiles, badFiles) => { // if (err) console.error(err); @@ -1290,34 +1290,37 @@ describe('scanDir', () => { }); it('should reply with all the good files, bad files, and viruses from a multi-level directory with some good files and some bad files', (done) => { - clamscan.settings.scanRecursively = false; eicarGen.writeMixed(); - clamscan.scanDir(mixedScanDir, (err, goodFiles, badFiles, viruses) => { + clamscan.settings.scanRecursively = true; + // clamscan.settings.debugMode = true; + + clamscan.scanDir(mixedScanDir, (err, goodFiles, badFiles, viruses, numGoodFiles) => { check(done, () => { const ignoreFiles = ['.DS_Store'].map((v) => `${mixedScanDir}/${v}`); goodFiles = goodFiles.filter((v) => !ignoreFiles.includes(v)); - console.log('Good Files: ', mixedScanDir, goodFiles); + // console.log('Good Files: ', mixedScanDir, goodFiles); + // console.log('Bad Files: ', mixedScanDir, badFiles); expect(err, 'scanDir should not return error').to.not.be.instanceof(Error); + const validBadFiles = [ + `${mixedScanDir}/folder1/bad_file_1.txt`, + `${mixedScanDir}/folder2/bad_file_2.txt`, + ]; + expect(badFiles, 'bad files should be array').to.be.an('array'); expect(badFiles, 'bad files should have 2 items').to.have.length(2); - expect(badFiles, 'bad files should include bad_file_1.txt').to.include( - `${mixedScanDir}/folder1/bad_file_1.txt` + expect(validBadFiles, 'bad files should include bad_file_1.txt').to.include( + badFiles[0].file ); - expect(badFiles, 'bad files should include bad_file_2.txt').to.include( - `${mixedScanDir}/folder2/bad_file_2.txt` + expect(validBadFiles, 'bad files should include bad_file_2.txt').to.include( + badFiles[1].file ); expect(goodFiles, 'good files should be array').to.be.an('array'); - expect(goodFiles, 'good files should have 3 items').to.have.length(2); - expect(goodFiles, 'good files include good_file_1.txt').to.include( - `${mixedScanDir}/folder1/good_file_1.txt` - ); - expect(goodFiles, 'good files should include good_file_2.txt').to.include( - `${mixedScanDir}/folder2/good_file_2.txt` - ); + expect(goodFiles, 'good files should be empty').to.have.length(0); + expect(numGoodFiles, 'num good files should be 2').to.be.eql(2); expect(viruses, 'viruses should not be empty').to.not.be.empty; expect(viruses, 'viruses should be array').to.be.an('array');