From 34d734812b59454fb406ed4d8864752066ff2497 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Jan 2023 14:28:10 +0000 Subject: [PATCH 01/22] Bump luxon from 2.3.0 to 2.5.2 Bumps [luxon](https://github.com/moment/luxon) from 2.3.0 to 2.5.2. - [Release notes](https://github.com/moment/luxon/releases) - [Changelog](https://github.com/moment/luxon/blob/master/CHANGELOG.md) - [Commits](https://github.com/moment/luxon/compare/2.3.0...2.5.2) --- updated-dependencies: - dependency-name: luxon dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- package-lock.json | 14 +++++++------- package.json | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/package-lock.json b/package-lock.json index f8585d9e..04897534 100644 --- a/package-lock.json +++ b/package-lock.json @@ -35,7 +35,7 @@ "js-yaml": "^3.13.1", "linebyline": "^1.3.0", "lodash": "^4.17.21", - "luxon": "^2.1.1", + "luxon": "^2.5.2", "lzma-native": "^8.0.1", "memory-cache": "^0.2.0", "mkdirp": "^0.5.1", @@ -5282,9 +5282,9 @@ "integrity": "sha1-81ypHEk/e3PaDgdJUwTxezH4fuU=" }, "node_modules/luxon": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/luxon/-/luxon-2.3.0.tgz", - "integrity": "sha512-gv6jZCV+gGIrVKhO90yrsn8qXPKD8HYZJtrUDSfEbow8Tkw84T9OnCyJhWvnJIaIF/tBuiAjZuQHUt1LddX2mg==", + "version": "2.5.2", + "resolved": "https://registry.npmjs.org/luxon/-/luxon-2.5.2.tgz", + "integrity": "sha512-Yg7/RDp4nedqmLgyH0LwgGRvMEKVzKbUdkBYyCosbHgJ+kaOUx0qzSiSatVc3DFygnirTPYnMM2P5dg2uH1WvA==", "engines": { "node": ">=12" } @@ -13190,9 +13190,9 @@ "integrity": "sha1-81ypHEk/e3PaDgdJUwTxezH4fuU=" }, "luxon": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/luxon/-/luxon-2.3.0.tgz", - "integrity": "sha512-gv6jZCV+gGIrVKhO90yrsn8qXPKD8HYZJtrUDSfEbow8Tkw84T9OnCyJhWvnJIaIF/tBuiAjZuQHUt1LddX2mg==" + "version": "2.5.2", + "resolved": "https://registry.npmjs.org/luxon/-/luxon-2.5.2.tgz", + "integrity": "sha512-Yg7/RDp4nedqmLgyH0LwgGRvMEKVzKbUdkBYyCosbHgJ+kaOUx0qzSiSatVc3DFygnirTPYnMM2P5dg2uH1WvA==" }, "lzma-native": { "version": "8.0.1", diff --git a/package.json b/package.json index 57bb81a8..c877edc6 100644 --- a/package.json +++ b/package.json @@ -53,7 +53,7 @@ "js-yaml": "^3.13.1", "linebyline": "^1.3.0", "lodash": "^4.17.21", - "luxon": "^2.1.1", + "luxon": "^2.5.2", "lzma-native": "^8.0.1", "memory-cache": "^0.2.0", "mkdirp": "^0.5.1", From 1c1b653c8652a9dc8e383a1f5a9fe2e65d5c8b67 Mon Sep 17 00:00:00 2001 From: Qing Tomlinson Date: Wed, 11 Oct 2023 14:42:17 -0700 Subject: [PATCH 02/22] Update readme --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index 1c8fea58..dc5d642d 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,18 @@ Process the source, if any: The crawler's output is stored for use by the rest of the ClearlyDefined infrastructure -- it is not intended to be used directly by humans. Note that each tool's output is stored separately and the results of processing the component and the component source are also separated. +### More on `type`... +The `type` in the request object typically corresponds to a internal processor in CD. +1. `component` is the most generic type. Internally, it is converted to a `package` or `source` request by the component processor. +2. `package` request is processed by the package processor and is further converted to a request with a specific type (crate, deb, gem, go, maven, npm, nuget, composer, pod, pypi). For a `package` typed request, if the mentioned specific binary package type is known, the specific type (e.g. `npm`) can be used (instead of `package`) in the harvest request and skip the conversion step. For example, +```json +{ + "type": "npm", + "url": "cd:/npm/npmjs/-/redie/0.3.0" +} +``` +3. `source` requests are processed by the source processor, which subsequently dispatches a `clearlydefined` typed request for the supported source types and other requests (one for each scanning tool). These are the more advanced scenarios where the request type and the coordinate type differ. + # Configuration The crawler is quite configuable. Out of the box it is setup for demo-level use directly on your computer. In its full glory it can run with arbitrarily many distributed clients using an array of different queuing, caching and storage technologies. From 49153343afc385fe8d5488924d62c846564dfeb3 Mon Sep 17 00:00:00 2001 From: Qing Tomlinson Date: Thu, 12 Oct 2023 10:08:59 -0700 Subject: [PATCH 03/22] Include review comments --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index dc5d642d..6ee6dc4c 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ Here are a few example request objects. } ``` -The request `type` describes the crawling activity being requested. For example, "do `package` crawling". It is typically the same as the `type` in the url (see below). There are some more advanced scenarios where the two values are different but for starters, treat them as the same. The general form of a request URL is (note: it is a URL because of the underlying crawling infrastructure, the `cd` scheme is not particularly relevant) +The request `type` describes the crawling activity being requested. For example, "do `package` crawling" (see [More on type](#more-on-type) for a description of valid type values). It is typically the same as the `type` in the url (see segments description below). There are some more advanced scenarios where the two values are different but for starters, treat them as the same. The general form of a request URL is (note: it is a URL because of the underlying crawling infrastructure, the `cd` scheme is not particularly relevant) ``` cd:/type/provider/namespace/name/revision @@ -80,10 +80,10 @@ Process the source, if any: The crawler's output is stored for use by the rest of the ClearlyDefined infrastructure -- it is not intended to be used directly by humans. Note that each tool's output is stored separately and the results of processing the component and the component source are also separated. -### More on `type`... -The `type` in the request object typically corresponds to a internal processor in CD. +### More on `type` +The `type` in the request object typically corresponds to an internal processor in CD. 1. `component` is the most generic type. Internally, it is converted to a `package` or `source` request by the component processor. -2. `package` request is processed by the package processor and is further converted to a request with a specific type (crate, deb, gem, go, maven, npm, nuget, composer, pod, pypi). For a `package` typed request, if the mentioned specific binary package type is known, the specific type (e.g. `npm`) can be used (instead of `package`) in the harvest request and skip the conversion step. For example, +2. `package` request is processed by the package processor and is further converted to a request with a specific type (`crate`, `deb`, `gem`, `go`, `maven`, `npm`, `nuget`, `composer`, `pod`, `pypi`). For a `package` typed request, if the mentioned specific binary package type is known, the specific type (e.g. `npm`) can be used (instead of `package`) in the harvest request and skip the conversion step. For example, ```json { "type": "npm", From 6eead811e194b601748f45fae5a783bccb8ce4d8 Mon Sep 17 00:00:00 2001 From: Qing Tomlinson Date: Thu, 28 Sep 2023 15:50:41 -0700 Subject: [PATCH 04/22] Exclude .git directory content for package file count --- .../process/abstractClearlyDefinedProcessor.js | 2 +- providers/process/abstractProcessor.js | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/providers/process/abstractClearlyDefinedProcessor.js b/providers/process/abstractClearlyDefinedProcessor.js index b1c5176e..6eabc9f1 100644 --- a/providers/process/abstractClearlyDefinedProcessor.js +++ b/providers/process/abstractClearlyDefinedProcessor.js @@ -75,7 +75,7 @@ class AbstractClearlyDefinedProcessor extends AbstractProcessor { let count = 0 const bytes = await du(location, { filter: file => { - if (path.basename(file) === '.git') return false + if (!this.isValidExcludingGit(file)) return false count++ return true } diff --git a/providers/process/abstractProcessor.js b/providers/process/abstractProcessor.js index e371561e..bc4c3c6f 100644 --- a/providers/process/abstractProcessor.js +++ b/providers/process/abstractProcessor.js @@ -150,15 +150,21 @@ class AbstractProcessor extends BaseHandler { */ async filterFiles(location) { const fullList = await this.getFiles(location) - const exclusions = ['.git'] - const filteredList = fullList.filter(file => { - if (!file) return false - const segments = file.split(/[\\/]/g) - return !intersection(segments, exclusions).length - }) + const filteredList = fullList.filter(file => this.isValidExcludingGit(file)) return trimAllParents(filteredList, location).filter(x => x) } + _isValid(file, exclusions) { + if (!file) return false + const segments = file.split(/[\\/]/g) + return !intersection(segments, exclusions).length + } + + isValidExcludingGit(file) { + const exclusions = ['.git'] + return this._isValid(file, exclusions) + } + shouldFetch() { return true } From edd0267d0d83da7865fa9c6ba5013a2bbbb829ee Mon Sep 17 00:00:00 2001 From: Qing Tomlinson Date: Thu, 2 Nov 2023 16:44:10 -0700 Subject: [PATCH 05/22] update @clearlydefined/spdx to to 0.1.7 Task: https://github.com/clearlydefined/crawler/issues/528 --- package-lock.json | 18 +++++++++--------- package.json | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/package-lock.json b/package-lock.json index 22099062..af67448b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,7 +10,7 @@ "hasInstallScript": true, "license": "MIT", "dependencies": { - "@clearlydefined/spdx": "^0.1.6", + "@clearlydefined/spdx": "^0.1.7", "@microsoft/refreshing-config": "^0.1.3", "applicationinsights": "^1.5.0", "ar-async": "^0.1.4", @@ -395,13 +395,13 @@ } }, "node_modules/@clearlydefined/spdx": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/@clearlydefined/spdx/-/spdx-0.1.6.tgz", - "integrity": "sha512-W+lPyDE8B9PXR1IGlhutacbhb+pjvc5q3ytSiXVeLssL1TDAM6uKDG5q9xJ8bvz5wU0WHY1k32y/8isB5p7rhA==", + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/@clearlydefined/spdx/-/spdx-0.1.7.tgz", + "integrity": "sha512-omlCkOl6FvJV3NbrReZdqhmXa/Ls3D4vvgSBNsTZpeQzcksKqMLof5vcN3j3jSXwePDrLTJKOj6818rN/WeY8Q==", "dependencies": { "spdx-expression-parse": "github:clearlydefined/spdx-expression-parse.js#fork", - "spdx-license-ids": "^3.0.7", - "spdx-license-list": "^6.4.0", + "spdx-license-ids": "^3.0.13", + "spdx-license-list": "^6.6.0", "spdx-satisfies": "github:clearlydefined/spdx-satisfies.js#parse-override" } }, @@ -7753,9 +7753,9 @@ } }, "node_modules/spdx-license-ids": { - "version": "3.0.7", - "resolved": "https://registry.npmjs.org/spdx-license-ids/-/spdx-license-ids-3.0.7.tgz", - "integrity": "sha512-U+MTEOO0AiDzxwFvoa4JVnMV6mZlJKk2sBLt90s7G0Gd0Mlknc7kxEn3nuDPNZRta7O2uy8oLcZLVT+4sqNZHQ==" + "version": "3.0.16", + "resolved": "https://registry.npmjs.org/spdx-license-ids/-/spdx-license-ids-3.0.16.tgz", + "integrity": "sha512-eWN+LnM3GR6gPu35WxNgbGl8rmY1AEmoMDvL/QD6zYmPWgywxWqJWNdLGT+ke8dKNWrcYgYjPpG5gbTfghP8rw==" }, "node_modules/spdx-license-list": { "version": "6.6.0", diff --git a/package.json b/package.json index 3b5ab0ad..b794db00 100644 --- a/package.json +++ b/package.json @@ -28,7 +28,7 @@ "url": "https://github.com/clearlydefined/crawler" }, "dependencies": { - "@clearlydefined/spdx": "^0.1.6", + "@clearlydefined/spdx": "^0.1.7", "@microsoft/refreshing-config": "^0.1.3", "applicationinsights": "^1.5.0", "ar-async": "^0.1.4", From 33af2af9f0dc000fd85d48156bdb8235878fa2bf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 25 Jan 2024 22:59:07 +0000 Subject: [PATCH 06/22] Bump xml2js from 0.4.23 to 0.5.0 Bumps [xml2js](https://github.com/Leonidas-from-XIV/node-xml2js) from 0.4.23 to 0.5.0. - [Commits](https://github.com/Leonidas-from-XIV/node-xml2js/commits/0.5.0) --- updated-dependencies: - dependency-name: xml2js dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- package-lock.json | 8 ++++---- package.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/package-lock.json b/package-lock.json index ede9772a..2d358c6c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -56,7 +56,7 @@ "unbzip2-stream": "^1.3.3", "winston": "^2.3.0", "winston-azure-application-insights": "^1.5.0", - "xml2js": "^0.4.22" + "xml2js": "^0.5.0" }, "devDependencies": { "chai": "^4.2.0", @@ -8702,9 +8702,9 @@ } }, "node_modules/xml2js": { - "version": "0.4.23", - "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.4.23.tgz", - "integrity": "sha512-ySPiMjM0+pLDftHgXY4By0uswI3SPKLDw/i3UXbnO8M/p28zqexCUoPmQFrYD+/1BzhGJSs2i1ERWKJAtiLrug==", + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/xml2js/-/xml2js-0.5.0.tgz", + "integrity": "sha512-drPFnkQJik/O+uPKpqSgr22mpuFHqKdbS835iAQrUC73L2F5WkboIRd63ai/2Yg6I1jzifPFKH2NTK+cfglkIA==", "dependencies": { "sax": ">=0.6.0", "xmlbuilder": "~11.0.0" diff --git a/package.json b/package.json index b1c07af5..d711bb50 100644 --- a/package.json +++ b/package.json @@ -74,7 +74,7 @@ "unbzip2-stream": "^1.3.3", "winston": "^2.3.0", "winston-azure-application-insights": "^1.5.0", - "xml2js": "^0.4.22" + "xml2js": "^0.5.0" }, "devDependencies": { "chai": "^4.2.0", From 9b1a2e80b3cd9d2bc9f5875bcb56860efb0a98d4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 25 Jan 2024 22:59:09 +0000 Subject: [PATCH 07/22] Bump axios from 0.27.2 to 1.6.0 Bumps [axios](https://github.com/axios/axios) from 0.27.2 to 1.6.0. - [Release notes](https://github.com/axios/axios/releases) - [Changelog](https://github.com/axios/axios/blob/v1.x/CHANGELOG.md) - [Commits](https://github.com/axios/axios/compare/v0.27.2...v1.6.0) --- updated-dependencies: - dependency-name: axios dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- package-lock.json | 18 ++++++++++++------ package.json | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/package-lock.json b/package-lock.json index ede9772a..13bd3457 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15,7 +15,7 @@ "applicationinsights": "^1.5.0", "ar-async": "^0.1.4", "async": "^3.1.0", - "axios": "^0.27.2", + "axios": "^1.6.0", "axios-retry": "^3.2.5", "azure-storage": "^2.10.3", "body-parser": "^1.19.0", @@ -977,12 +977,13 @@ "integrity": "sha512-ReZxvNHIOv88FlT7rxcXIIC0fPt4KZqZbOlivyWtXLt8ESx84zd3kMC6iK5jVeS2qt+g7ftS7ye4fi06X5rtRQ==" }, "node_modules/axios": { - "version": "0.27.2", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.27.2.tgz", - "integrity": "sha512-t+yRIyySRTp/wua5xEr+z1q60QmLq8ABsS5O9Me1AsE5dfKqgnCFzwiCZZ/cGNd1lq4/7akDWMxdhVlucjmnOQ==", + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.0.tgz", + "integrity": "sha512-EZ1DYihju9pwVB+jg67ogm+Tmqc6JmhamRN6I4Zt8DfZu5lbcQGw3ozH9lFejSJgs/ibaef3A9PMXPLeefFGJg==", "dependencies": { - "follow-redirects": "^1.14.9", - "form-data": "^4.0.0" + "follow-redirects": "^1.15.0", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" } }, "node_modules/axios-retry": { @@ -6865,6 +6866,11 @@ "node": ">= 0.10" } }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, "node_modules/proxyquire": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/proxyquire/-/proxyquire-2.1.3.tgz", diff --git a/package.json b/package.json index b1c07af5..9d64841a 100644 --- a/package.json +++ b/package.json @@ -33,7 +33,7 @@ "applicationinsights": "^1.5.0", "ar-async": "^0.1.4", "async": "^3.1.0", - "axios": "^0.27.2", + "axios": "^1.6.0", "axios-retry": "^3.2.5", "azure-storage": "^2.10.3", "body-parser": "^1.19.0", From 07369aa59b5a36349b23d5221572bcc40da1686e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 25 Jan 2024 22:59:21 +0000 Subject: [PATCH 08/22] Bump follow-redirects from 1.15.1 to 1.15.5 Bumps [follow-redirects](https://github.com/follow-redirects/follow-redirects) from 1.15.1 to 1.15.5. - [Release notes](https://github.com/follow-redirects/follow-redirects/releases) - [Commits](https://github.com/follow-redirects/follow-redirects/compare/v1.15.1...v1.15.5) --- updated-dependencies: - dependency-name: follow-redirects dependency-type: indirect ... Signed-off-by: dependabot[bot] --- package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index ede9772a..c9d07bfd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3492,9 +3492,9 @@ "dev": true }, "node_modules/follow-redirects": { - "version": "1.15.1", - "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.1.tgz", - "integrity": "sha512-yLAMQs+k0b2m7cVxpS1VKJVvoz7SS9Td1zss3XRwXj+ZDH00RJgnuLx7E44wx02kQLrdM3aOOy+FpzS7+8OizA==", + "version": "1.15.5", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.5.tgz", + "integrity": "sha512-vSFWUON1B+yAw1VN4xMfxgn5fTUiaOzAJCKBwIIgT/+7CuGy9+r+5gITvP62j3RmaD5Ph65UaERdOSRGUzZtgw==", "funding": [ { "type": "individual", From 64b454e2ce3469fd6a222b38c73f7165cae0642f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lukas=20Spie=C3=9F?= Date: Wed, 31 Jan 2024 15:34:39 +0100 Subject: [PATCH 09/22] Add GitHub Actions workflow to run tests --- .github/workflows/test.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .github/workflows/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..43cf99f4 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,29 @@ +name: Run tests + +on: + push: + branches: + - master + pull_request: + branches: + - master + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4.1.1 + + - uses: actions/setup-node@v4.0.1 + with: + node-version: 18 + cache: 'npm' + + - name: Install dependencies + run: npm install + + - name: Run tests + run: npm test From 25195d93030ef73156f663742ad83997c59fa8ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lukas=20Spie=C3=9F?= Date: Wed, 31 Jan 2024 23:06:20 +0100 Subject: [PATCH 10/22] Use `npm ci` to install dependencies in CI --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 43cf99f4..781defbf 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,7 +23,7 @@ jobs: cache: 'npm' - name: Install dependencies - run: npm install + run: npm ci - name: Run tests run: npm test From ca3901a816f41eb26d90b571de53e7caaea9455f Mon Sep 17 00:00:00 2001 From: Qing Tomlinson Date: Wed, 10 Jan 2024 21:50:31 -0700 Subject: [PATCH 11/22] Refactor and add unit test --- lib/utils.js | 10 +++++++++- .../process/abstractClearlyDefinedProcessor.js | 4 ++-- providers/process/abstractProcessor.js | 17 +++-------------- test/unit/lib/utilsTests.js | 18 +++++++++++++++++- 4 files changed, 31 insertions(+), 18 deletions(-) diff --git a/lib/utils.js b/lib/utils.js index 722b6fae..b3c2b63b 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -2,6 +2,7 @@ // SPDX-License-Identifier: MIT const { DateTime } = require('luxon') const { spawn } = require('child_process') +const { intersection } = require('lodash') const dateTimeFormats = [ 'EEE MMM d HH:mm:ss \'GMT\'ZZ yyyy' //in pom properties @@ -31,6 +32,13 @@ function trimAllParents(paths, parents) { return paths.map(path => trimParents(path, parents)) } +function isValidExcludingGit(file) { + const exclusions = ['.git'] + if (!file) return false + const segments = file.split(/[\\/]/g) + return !intersection(segments, exclusions).length +} + function extractDate(dateAndTime, formats = dateTimeFormats) { if (!dateAndTime) return dateAndTime let luxonResult = DateTime.fromISO(dateAndTime) @@ -75,4 +83,4 @@ function spawnPromisified(command, args, options) { }) } -module.exports = { normalizePath, normalizePaths, trimParents, trimAllParents, extractDate, spawnPromisified } +module.exports = { normalizePath, normalizePaths, trimParents, trimAllParents, isValidExcludingGit, extractDate, spawnPromisified } diff --git a/providers/process/abstractClearlyDefinedProcessor.js b/providers/process/abstractClearlyDefinedProcessor.js index 6eabc9f1..078d8db2 100644 --- a/providers/process/abstractClearlyDefinedProcessor.js +++ b/providers/process/abstractClearlyDefinedProcessor.js @@ -6,7 +6,7 @@ const throat = require('throat') const path = require('path') const { pick, merge } = require('lodash') const du = require('du') -const { trimParents } = require('../../lib/utils') +const { trimParents, isValidExcludingGit } = require('../../lib/utils') class AbstractClearlyDefinedProcessor extends AbstractProcessor { get toolVersion() { @@ -75,7 +75,7 @@ class AbstractClearlyDefinedProcessor extends AbstractProcessor { let count = 0 const bytes = await du(location, { filter: file => { - if (!this.isValidExcludingGit(file)) return false + if (!isValidExcludingGit(file)) return false count++ return true } diff --git a/providers/process/abstractProcessor.js b/providers/process/abstractProcessor.js index bc4c3c6f..23007ff0 100644 --- a/providers/process/abstractProcessor.js +++ b/providers/process/abstractProcessor.js @@ -6,11 +6,11 @@ const EntitySpec = require('../../lib/entitySpec') const fs = require('fs') const path = require('path') const shajs = require('sha.js') -const { clone, flatten, intersection, pick, set } = require('lodash') +const { clone, flatten, pick, set } = require('lodash') const { promisify } = require('util') const readdir = promisify(fs.readdir) const lstat = promisify(fs.lstat) -const { trimAllParents } = require('../../lib/utils') +const { trimAllParents, isValidExcludingGit } = require('../../lib/utils') class AbstractProcessor extends BaseHandler { constructor(options) { @@ -150,21 +150,10 @@ class AbstractProcessor extends BaseHandler { */ async filterFiles(location) { const fullList = await this.getFiles(location) - const filteredList = fullList.filter(file => this.isValidExcludingGit(file)) + const filteredList = fullList.filter(file => isValidExcludingGit(file)) return trimAllParents(filteredList, location).filter(x => x) } - _isValid(file, exclusions) { - if (!file) return false - const segments = file.split(/[\\/]/g) - return !intersection(segments, exclusions).length - } - - isValidExcludingGit(file) { - const exclusions = ['.git'] - return this._isValid(file, exclusions) - } - shouldFetch() { return true } diff --git a/test/unit/lib/utilsTests.js b/test/unit/lib/utilsTests.js index 82bee191..3ddc9933 100644 --- a/test/unit/lib/utilsTests.js +++ b/test/unit/lib/utilsTests.js @@ -3,7 +3,7 @@ const chai = require('chai') const chaiAsPromised = require('chai-as-promised') -const { normalizePath, normalizePaths, trimParents, trimAllParents, extractDate, spawnPromisified } = require('../../../lib/utils') +const { normalizePath, normalizePaths, trimParents, trimAllParents, extractDate, spawnPromisified, isValidExcludingGit } = require('../../../lib/utils') const { promisify } = require('util') const execFile = promisify(require('child_process').execFile) chai.use(chaiAsPromised) @@ -56,6 +56,22 @@ describe('Utils path functions', () => { }) }) +describe('Util isValidExcludingGit', () => { + it('should exclude .git and its contents', () => { + const data = new Map([ + [null, false], + ['/', true], + ['/tmp/tempX/package/src', true], + ['.git', false], + ['/tmp/tempX/package/.git', false], + ['/tmp/tempX/package/.git/hooks/pre-merge-commit.sample', false] + ]) + data.forEach((expected, input) => { + expect(isValidExcludingGit(input)).to.eq(expected) + }) + }) +}) + describe('Util extractDate', () => { it('handle null', () => { expect(extractDate(null)).to.be.null From d53ed5d232cd9b267a1e5f4108cefc8ac903ec23 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 7 Feb 2024 00:34:11 +0000 Subject: [PATCH 12/22] Bump @babel/traverse from 7.12.9 to 7.23.9 Bumps [@babel/traverse](https://github.com/babel/babel/tree/HEAD/packages/babel-traverse) from 7.12.9 to 7.23.9. - [Release notes](https://github.com/babel/babel/releases) - [Changelog](https://github.com/babel/babel/blob/main/CHANGELOG.md) - [Commits](https://github.com/babel/babel/commits/v7.23.9/packages/babel-traverse) --- updated-dependencies: - dependency-name: "@babel/traverse" dependency-type: indirect ... Signed-off-by: dependabot[bot] --- package-lock.json | 239 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 169 insertions(+), 70 deletions(-) diff --git a/package-lock.json b/package-lock.json index 057dd0b6..da901ca6 100644 --- a/package-lock.json +++ b/package-lock.json @@ -156,43 +156,52 @@ } }, "node_modules/@babel/generator": { - "version": "7.12.5", - "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.12.5.tgz", - "integrity": "sha512-m16TQQJ8hPt7E+OS/XVQg/7U184MLXtvuGbCdA7na61vha+ImkyyNM/9DDA0unYCVZn3ZOhng+qz48/KBOT96A==", + "version": "7.23.6", + "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.23.6.tgz", + "integrity": "sha512-qrSfCYxYQB5owCmGLbl8XRpX1ytXlpueOb0N0UmQwA073KZxejgQTzAmJezxvpwQD9uGtK2shHdi55QT+MbjIw==", "dev": true, "dependencies": { - "@babel/types": "^7.12.5", - "jsesc": "^2.5.1", - "source-map": "^0.5.0" + "@babel/types": "^7.23.6", + "@jridgewell/gen-mapping": "^0.3.2", + "@jridgewell/trace-mapping": "^0.3.17", + "jsesc": "^2.5.1" + }, + "engines": { + "node": ">=6.9.0" } }, - "node_modules/@babel/generator/node_modules/source-map": { - "version": "0.5.7", - "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.5.7.tgz", - "integrity": "sha1-igOdLRAh0i0eoUyA2OpGi6LvP8w=", + "node_modules/@babel/helper-environment-visitor": { + "version": "7.22.20", + "resolved": "https://registry.npmjs.org/@babel/helper-environment-visitor/-/helper-environment-visitor-7.22.20.tgz", + "integrity": "sha512-zfedSIzFhat/gFhWfHtgWvlec0nqB9YEIVrpuwjruLlXfUSnA8cJB0miHKwqDnQ7d32aKo2xt88/xZptwxbfhA==", "dev": true, "engines": { - "node": ">=0.10.0" + "node": ">=6.9.0" } }, "node_modules/@babel/helper-function-name": { - "version": "7.10.4", - "resolved": "https://registry.npmjs.org/@babel/helper-function-name/-/helper-function-name-7.10.4.tgz", - "integrity": "sha512-YdaSyz1n8gY44EmN7x44zBn9zQ1Ry2Y+3GTA+3vH6Mizke1Vw0aWDM66FOYEPw8//qKkmqOckrGgTYa+6sceqQ==", + "version": "7.23.0", + "resolved": "https://registry.npmjs.org/@babel/helper-function-name/-/helper-function-name-7.23.0.tgz", + "integrity": "sha512-OErEqsrxjZTJciZ4Oo+eoZqeW9UIiOcuYKRJA4ZAgV9myA+pOXhhmpfNCKjEH/auVfEYVFJ6y1Tc4r0eIApqiw==", "dev": true, "dependencies": { - "@babel/helper-get-function-arity": "^7.10.4", - "@babel/template": "^7.10.4", - "@babel/types": "^7.10.4" + "@babel/template": "^7.22.15", + "@babel/types": "^7.23.0" + }, + "engines": { + "node": ">=6.9.0" } }, - "node_modules/@babel/helper-get-function-arity": { - "version": "7.10.4", - "resolved": "https://registry.npmjs.org/@babel/helper-get-function-arity/-/helper-get-function-arity-7.10.4.tgz", - "integrity": "sha512-EkN3YDB+SRDgiIUnNgcmiD361ti+AVbL3f3Henf6dqqUyr5dMsorno0lJWJuLhDhkI5sYEpgj6y9kB8AOU1I2A==", + "node_modules/@babel/helper-hoist-variables": { + "version": "7.22.5", + "resolved": "https://registry.npmjs.org/@babel/helper-hoist-variables/-/helper-hoist-variables-7.22.5.tgz", + "integrity": "sha512-wGjk9QZVzvknA6yKIUURb8zY3grXCcOZt+/7Wcy8O2uctxhplmUPkOdlgoNhmdVee2c92JXbf1xpMtVNbfoxRw==", "dev": true, "dependencies": { - "@babel/types": "^7.10.4" + "@babel/types": "^7.22.5" + }, + "engines": { + "node": ">=6.9.0" } }, "node_modules/@babel/helper-member-expression-to-functions": { @@ -261,19 +270,34 @@ } }, "node_modules/@babel/helper-split-export-declaration": { - "version": "7.11.0", - "resolved": "https://registry.npmjs.org/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.11.0.tgz", - "integrity": "sha512-74Vejvp6mHkGE+m+k5vHY93FX2cAtrw1zXrZXRlG4l410Nm9PxfEiVTn1PjDPV5SnmieiueY4AFg2xqhNFuuZg==", + "version": "7.22.6", + "resolved": "https://registry.npmjs.org/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.22.6.tgz", + "integrity": "sha512-AsUnxuLhRYsisFiaJwvp1QF+I3KjD5FOxut14q/GzovUe6orHLesW2C7d754kRm53h5gqrz6sFl6sxc4BVtE/g==", "dev": true, "dependencies": { - "@babel/types": "^7.11.0" + "@babel/types": "^7.22.5" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/helper-string-parser": { + "version": "7.23.4", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.23.4.tgz", + "integrity": "sha512-803gmbQdqwdf4olxrX4AJyFBV/RTr3rSmOj0rKwesmzlfhYNDEs+/iOcznzpNWlJlIlTJC2QfPFcHB6DlzdVLQ==", + "dev": true, + "engines": { + "node": ">=6.9.0" } }, "node_modules/@babel/helper-validator-identifier": { - "version": "7.10.4", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.10.4.tgz", - "integrity": "sha512-3U9y+43hz7ZM+rzG24Qe2mufW5KhvFg/NhnNph+i9mgCtdTCtMJuI1TMkrIUiK7Ix4PYlRF9I5dhqaLYA/ADXw==", - "dev": true + "version": "7.22.20", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz", + "integrity": "sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==", + "dev": true, + "engines": { + "node": ">=6.9.0" + } }, "node_modules/@babel/helpers": { "version": "7.12.5", @@ -287,32 +311,23 @@ } }, "node_modules/@babel/highlight": { - "version": "7.16.0", - "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.16.0.tgz", - "integrity": "sha512-t8MH41kUQylBtu2+4IQA3atqevA2lRgqA2wyVB/YiWmsDSuylZZuXOUy9ric30hfzauEFfdsuk/eXTRrGrfd0g==", + "version": "7.23.4", + "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.23.4.tgz", + "integrity": "sha512-acGdbYSfp2WheJoJm/EBBBLh/ID8KDc64ISZ9DYtBmC8/Q204PZJLHyzeB5qMzJ5trcOkybd78M4x2KWsUq++A==", "dev": true, "dependencies": { - "@babel/helper-validator-identifier": "^7.15.7", - "chalk": "^2.0.0", + "@babel/helper-validator-identifier": "^7.22.20", + "chalk": "^2.4.2", "js-tokens": "^4.0.0" }, "engines": { "node": ">=6.9.0" } }, - "node_modules/@babel/highlight/node_modules/@babel/helper-validator-identifier": { - "version": "7.15.7", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.15.7.tgz", - "integrity": "sha512-K4JvCtQqad9OY2+yTU8w+E82ywk/fe+ELNlt1G8z3bVGlZfn/hOcQQsUhGhW/N+tb3fxK800wLtKOE/aM0m72w==", - "dev": true, - "engines": { - "node": ">=6.9.0" - } - }, "node_modules/@babel/parser": { - "version": "7.12.7", - "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.12.7.tgz", - "integrity": "sha512-oWR02Ubp4xTLCAqPRiNIuMVgNO5Aif/xpXtabhzW2HWUD47XJsAB4Zd/Rg30+XeQA3juXigV7hlquOTmwqLiwg==", + "version": "7.23.9", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.23.9.tgz", + "integrity": "sha512-9tcKgqKbs3xGJ+NtKF2ndOBBLVwPjl1SHxPQkd36r3Dlirw3xWUeGaTbqr7uGZcTaxkVNwc+03SVP7aCdWrTlA==", "dev": true, "bin": { "parser": "bin/babel-parser.js" @@ -333,31 +348,64 @@ } }, "node_modules/@babel/template": { - "version": "7.12.7", - "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.12.7.tgz", - "integrity": "sha512-GkDzmHS6GV7ZeXfJZ0tLRBhZcMcY0/Lnb+eEbXDBfCAcZCjrZKe6p3J4we/D24O9Y8enxWAg1cWwof59yLh2ow==", + "version": "7.23.9", + "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.23.9.tgz", + "integrity": "sha512-+xrD2BWLpvHKNmX2QbpdpsBaWnRxahMwJjO+KZk2JOElj5nSmKezyS1B4u+QbHMTX69t4ukm6hh9lsYQ7GHCKA==", "dev": true, "dependencies": { - "@babel/code-frame": "^7.10.4", - "@babel/parser": "^7.12.7", - "@babel/types": "^7.12.7" + "@babel/code-frame": "^7.23.5", + "@babel/parser": "^7.23.9", + "@babel/types": "^7.23.9" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/template/node_modules/@babel/code-frame": { + "version": "7.23.5", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.23.5.tgz", + "integrity": "sha512-CgH3s1a96LipHCmSUmYFPwY7MNx8C3avkq7i4Wl3cfa662ldtUe4VM1TPXX70pfmrlWTb6jLqTYrZyT2ZTJBgA==", + "dev": true, + "dependencies": { + "@babel/highlight": "^7.23.4", + "chalk": "^2.4.2" + }, + "engines": { + "node": ">=6.9.0" } }, "node_modules/@babel/traverse": { - "version": "7.12.9", - "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.12.9.tgz", - "integrity": "sha512-iX9ajqnLdoU1s1nHt36JDI9KG4k+vmI8WgjK5d+aDTwQbL2fUnzedNedssA645Ede3PM2ma1n8Q4h2ohwXgMXw==", + "version": "7.23.9", + "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.23.9.tgz", + "integrity": "sha512-I/4UJ9vs90OkBtY6iiiTORVMyIhJ4kAVmsKo9KFc8UOxMeUfi2hvtIBsET5u9GizXE6/GFSuKCTNfgCswuEjRg==", "dev": true, "dependencies": { - "@babel/code-frame": "^7.10.4", - "@babel/generator": "^7.12.5", - "@babel/helper-function-name": "^7.10.4", - "@babel/helper-split-export-declaration": "^7.11.0", - "@babel/parser": "^7.12.7", - "@babel/types": "^7.12.7", - "debug": "^4.1.0", - "globals": "^11.1.0", - "lodash": "^4.17.19" + "@babel/code-frame": "^7.23.5", + "@babel/generator": "^7.23.6", + "@babel/helper-environment-visitor": "^7.22.20", + "@babel/helper-function-name": "^7.23.0", + "@babel/helper-hoist-variables": "^7.22.5", + "@babel/helper-split-export-declaration": "^7.22.6", + "@babel/parser": "^7.23.9", + "@babel/types": "^7.23.9", + "debug": "^4.3.1", + "globals": "^11.1.0" + }, + "engines": { + "node": ">=6.9.0" + } + }, + "node_modules/@babel/traverse/node_modules/@babel/code-frame": { + "version": "7.23.5", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.23.5.tgz", + "integrity": "sha512-CgH3s1a96LipHCmSUmYFPwY7MNx8C3avkq7i4Wl3cfa662ldtUe4VM1TPXX70pfmrlWTb6jLqTYrZyT2ZTJBgA==", + "dev": true, + "dependencies": { + "@babel/highlight": "^7.23.4", + "chalk": "^2.4.2" + }, + "engines": { + "node": ">=6.9.0" } }, "node_modules/@babel/traverse/node_modules/debug": { @@ -384,14 +432,17 @@ "dev": true }, "node_modules/@babel/types": { - "version": "7.12.7", - "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.12.7.tgz", - "integrity": "sha512-MNyI92qZq6jrQkXvtIiykvl4WtoRrVV9MPn+ZfsoEENjiWcBQ3ZSHrkxnJWgWtLX3XXqX5hrSQ+X69wkmesXuQ==", + "version": "7.23.9", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.23.9.tgz", + "integrity": "sha512-dQjSq/7HaSjRM43FFGnv5keM2HsxpmyV1PfaSVm0nzzjwwTmjOe6J4bC8e3+pTEIgHaHj+1ZlLThRJ2auc/w1Q==", "dev": true, "dependencies": { - "@babel/helper-validator-identifier": "^7.10.4", - "lodash": "^4.17.19", + "@babel/helper-string-parser": "^7.23.4", + "@babel/helper-validator-identifier": "^7.22.20", "to-fast-properties": "^2.0.0" + }, + "engines": { + "node": ">=6.9.0" } }, "node_modules/@clearlydefined/spdx": { @@ -573,6 +624,54 @@ "node": ">=8" } }, + "node_modules/@jridgewell/gen-mapping": { + "version": "0.3.3", + "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.3.tgz", + "integrity": "sha512-HLhSWOLRi875zjjMG/r+Nv0oCW8umGb0BgEhyX3dDX3egwZtB8PqLnjz3yedt8R5StBrzcg4aBpnh8UA9D1BoQ==", + "dev": true, + "dependencies": { + "@jridgewell/set-array": "^1.0.1", + "@jridgewell/sourcemap-codec": "^1.4.10", + "@jridgewell/trace-mapping": "^0.3.9" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.1.tgz", + "integrity": "sha512-dSYZh7HhCDtCKm4QakX0xFpsRDqjjtZf/kjI/v3T3Nwt5r8/qz/M19F9ySyOqU94SXBmeG9ttTul+YnR4LOxFA==", + "dev": true, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/set-array": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.1.2.tgz", + "integrity": "sha512-xnkseuNADM0gt2bs+BvhO0p78Mk762YnZdsuzFV018NoG1Sj1SCQvpSqa7XUaTam5vAGasABV9qXASMKnFMwMw==", + "dev": true, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.4.15", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz", + "integrity": "sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==", + "dev": true + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.22", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.22.tgz", + "integrity": "sha512-Wf963MzWtA2sjrNt+g18IAln9lKnlRp+K2eH4jjIoF1wYeq3aMREpG09xhlhdzS0EjwU7qmUJYangWa+151vZw==", + "dev": true, + "dependencies": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, "node_modules/@microsoft/refreshing-config": { "version": "0.1.3", "resolved": "https://registry.npmjs.org/@microsoft/refreshing-config/-/refreshing-config-0.1.3.tgz", From 09178fa9c309de64f2206e2fd8e3cc05b3268f6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lukas=20Spie=C3=9F?= Date: Wed, 14 Feb 2024 12:25:01 +0100 Subject: [PATCH 13/22] Delete azure-pipelines.yml --- azure-pipelines.yml | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 azure-pipelines.yml diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index 5e72c90c..00000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,21 +0,0 @@ -# Node.js -# Build a general Node.js project with npm. -# Add steps that analyze code, save build artifacts, deploy, and more: -# https://docs.microsoft.com/azure/devops/pipelines/languages/javascript - -pool: - vmImage: 'ubuntu-20.04' - -steps: - - task: NodeTool@0 - inputs: - versionSpec: '18.x' - displayName: 'Install Node.js' - - - script: | - npm install - displayName: 'npm install' - - - script: | - npm test - displayName: 'npm test' From bc0e178f998037d1cf0a63a38ae2b6f020f83f5b Mon Sep 17 00:00:00 2001 From: Yash Kohli Date: Tue, 20 Feb 2024 12:41:50 +0530 Subject: [PATCH 14/22] Fix Issue in Dev Env Setup for Mac --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 README.md diff --git a/README.md b/README.md old mode 100644 new mode 100755 index 6ee6dc4c..a6a584e5 --- a/README.md +++ b/README.md @@ -145,7 +145,7 @@ See `local.env.list`, `dev.env.list` and `prod.env.list` tempate files. ## Build and run Docker image locally -`docker build -t cdcrawler:latest .` +`docker build --platform linux/amd64 -t cdcrawler:latest .` `docker run --rm --env-file ../dev.env.list -p 5000:5000 -p 9229:9229 cdcrawler:latest` From 97c40bc22dea60d9d69b556fa9509172a0fc3aa5 Mon Sep 17 00:00:00 2001 From: Yash Kohli Date: Thu, 22 Feb 2024 22:48:50 +0530 Subject: [PATCH 15/22] Fix Issue in Dev Env Setup for Mac --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a6a584e5..8d7a0f5c 100755 --- a/README.md +++ b/README.md @@ -133,7 +133,7 @@ If a CRAWLER_ID is specified, then each instance must have this setting globally ## Run Docker image from Docker Hub You can run the image as is from docker (this is w/o any port forwarding, which means the only way you can interact with the crawler locally is through the queue. See below for examples of how to run with ports exposed to do curl based testing). -`docker run --env-file ../.env.list clearlydefined/crawler` +`docker run --platform linux/amd64 --env-file ../.env.list clearlydefined/crawler` See `local.env.list`, `dev.env.list` and `prod.env.list` tempate files. @@ -147,11 +147,11 @@ See `local.env.list`, `dev.env.list` and `prod.env.list` tempate files. `docker build --platform linux/amd64 -t cdcrawler:latest .` -`docker run --rm --env-file ../dev.env.list -p 5000:5000 -p 9229:9229 cdcrawler:latest` +`docker run --platform linux/amd64 --rm --env-file ../dev.env.list -p 5000:5000 -p 9229:9229 cdcrawler:latest` With a debugger: -`docker run --rm -d --env-file ../dev.env.list -p 9229:9229 -p 5000:5000 --entrypoint node cdcrawler:latest --inspect-brk=0.0.0.0:9229 index.js` +`docker run --platform linux/amd64 --rm -d --env-file ../dev.env.list -p 9229:9229 -p 5000:5000 --entrypoint node cdcrawler:latest --inspect-brk=0.0.0.0:9229 index.js` At this point you can attach VS Code with the built in debugging profile (see .vscode/launch.json) From 740967cd9f460e8fe4d1c6dd8f9ad4b7f28e5f6d Mon Sep 17 00:00:00 2001 From: Qing Tomlinson Date: Fri, 15 Mar 2024 14:30:39 -0700 Subject: [PATCH 16/22] Address review comments --- lib/utils.js | 7 ++--- .../abstractClearlyDefinedProcessor.js | 4 +-- providers/process/abstractProcessor.js | 4 +-- test/unit/lib/utilsTests.js | 28 +++++++++---------- 4 files changed, 21 insertions(+), 22 deletions(-) diff --git a/lib/utils.js b/lib/utils.js index b3c2b63b..df2e395a 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -32,11 +32,10 @@ function trimAllParents(paths, parents) { return paths.map(path => trimParents(path, parents)) } -function isValidExcludingGit(file) { - const exclusions = ['.git'] +function isGitFile(file) { if (!file) return false const segments = file.split(/[\\/]/g) - return !intersection(segments, exclusions).length + return intersection(segments, ['.git']).length > 0 } function extractDate(dateAndTime, formats = dateTimeFormats) { @@ -83,4 +82,4 @@ function spawnPromisified(command, args, options) { }) } -module.exports = { normalizePath, normalizePaths, trimParents, trimAllParents, isValidExcludingGit, extractDate, spawnPromisified } +module.exports = { normalizePath, normalizePaths, trimParents, trimAllParents, isGitFile, extractDate, spawnPromisified } diff --git a/providers/process/abstractClearlyDefinedProcessor.js b/providers/process/abstractClearlyDefinedProcessor.js index 078d8db2..506d860c 100644 --- a/providers/process/abstractClearlyDefinedProcessor.js +++ b/providers/process/abstractClearlyDefinedProcessor.js @@ -6,7 +6,7 @@ const throat = require('throat') const path = require('path') const { pick, merge } = require('lodash') const du = require('du') -const { trimParents, isValidExcludingGit } = require('../../lib/utils') +const { trimParents, isGitFile } = require('../../lib/utils') class AbstractClearlyDefinedProcessor extends AbstractProcessor { get toolVersion() { @@ -75,7 +75,7 @@ class AbstractClearlyDefinedProcessor extends AbstractProcessor { let count = 0 const bytes = await du(location, { filter: file => { - if (!isValidExcludingGit(file)) return false + if (isGitFile(file)) return false count++ return true } diff --git a/providers/process/abstractProcessor.js b/providers/process/abstractProcessor.js index 23007ff0..4ccb5d32 100644 --- a/providers/process/abstractProcessor.js +++ b/providers/process/abstractProcessor.js @@ -10,7 +10,7 @@ const { clone, flatten, pick, set } = require('lodash') const { promisify } = require('util') const readdir = promisify(fs.readdir) const lstat = promisify(fs.lstat) -const { trimAllParents, isValidExcludingGit } = require('../../lib/utils') +const { trimAllParents, isGitFile } = require('../../lib/utils') class AbstractProcessor extends BaseHandler { constructor(options) { @@ -150,7 +150,7 @@ class AbstractProcessor extends BaseHandler { */ async filterFiles(location) { const fullList = await this.getFiles(location) - const filteredList = fullList.filter(file => isValidExcludingGit(file)) + const filteredList = fullList.filter(file => file && !isGitFile(file)) return trimAllParents(filteredList, location).filter(x => x) } diff --git a/test/unit/lib/utilsTests.js b/test/unit/lib/utilsTests.js index 3ddc9933..b3cf5028 100644 --- a/test/unit/lib/utilsTests.js +++ b/test/unit/lib/utilsTests.js @@ -3,7 +3,7 @@ const chai = require('chai') const chaiAsPromised = require('chai-as-promised') -const { normalizePath, normalizePaths, trimParents, trimAllParents, extractDate, spawnPromisified, isValidExcludingGit } = require('../../../lib/utils') +const { normalizePath, normalizePaths, trimParents, trimAllParents, extractDate, spawnPromisified, isGitFile } = require('../../../lib/utils') const { promisify } = require('util') const execFile = promisify(require('child_process').execFile) chai.use(chaiAsPromised) @@ -56,20 +56,20 @@ describe('Utils path functions', () => { }) }) -describe('Util isValidExcludingGit', () => { - it('should exclude .git and its contents', () => { - const data = new Map([ - [null, false], - ['/', true], - ['/tmp/tempX/package/src', true], - ['.git', false], - ['/tmp/tempX/package/.git', false], - ['/tmp/tempX/package/.git/hooks/pre-merge-commit.sample', false] - ]) - data.forEach((expected, input) => { - expect(isValidExcludingGit(input)).to.eq(expected) - }) +describe('Util isGitFile', () => { + const entries = new Map([ + [null, false], + ['/', false], + ['/tmp/tempX/package/src', false], + ['.git', true], + ['/tmp/tempX/package/.git', true], + ['/tmp/tempX/package/.git/hooks/pre-merge-commit.sample', true] + ]) + + entries.forEach((expected, file) => { + it(`should return ${expected} for isGitFile given '${file}'`, () => expect(isGitFile(file)).to.eq(expected)) }) + }) describe('Util extractDate', () => { From dfb66a267655e909baccadec2580aa951cb29f9a Mon Sep 17 00:00:00 2001 From: yashkohli88 Date: Tue, 9 Apr 2024 11:07:20 +0530 Subject: [PATCH 17/22] Updated URL to fetch latest GO packages --- providers/fetch/goFetch.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/providers/fetch/goFetch.js b/providers/fetch/goFetch.js index fd9e9fb2..5b9464cb 100644 --- a/providers/fetch/goFetch.js +++ b/providers/fetch/goFetch.js @@ -69,7 +69,8 @@ class GoFetch extends AbstractFetch { } async _getLatestVersion(spec) { - const initial_url = `https://${spec.provider}/${spec.namespace}/${spec.name}/@v/list` + //const initial_url = `https://${spec.provider}/${spec.namespace}/${spec.name}/@v/list` + const initial_url = `https://proxy.golang.org/${spec.namespace}/${spec.name}/@v/list` const replace_encoded_url = this._replace_encodings(initial_url) const url = replace_encoded_url.replace(/null\//g, '') From 58fef9c18044fae135e6d3cfbefff12ffef9bffc Mon Sep 17 00:00:00 2001 From: yashkohli88 Date: Tue, 9 Apr 2024 11:08:25 +0530 Subject: [PATCH 18/22] Updated URL to fetch latest GO packages --- providers/fetch/goFetch.js | 1 - 1 file changed, 1 deletion(-) diff --git a/providers/fetch/goFetch.js b/providers/fetch/goFetch.js index 5b9464cb..371fbd4d 100644 --- a/providers/fetch/goFetch.js +++ b/providers/fetch/goFetch.js @@ -69,7 +69,6 @@ class GoFetch extends AbstractFetch { } async _getLatestVersion(spec) { - //const initial_url = `https://${spec.provider}/${spec.namespace}/${spec.name}/@v/list` const initial_url = `https://proxy.golang.org/${spec.namespace}/${spec.name}/@v/list` const replace_encoded_url = this._replace_encodings(initial_url) const url = replace_encoded_url.replace(/null\//g, '') From e350f0e83e0d41009ed9b08f0ccef215f5c0055b Mon Sep 17 00:00:00 2001 From: yashkohli88 Date: Tue, 16 Apr 2024 10:41:32 +0530 Subject: [PATCH 19/22] Updated test to validate GO package download URL --- test/unit/providers/fetch/goFetchTests.js | 1 + 1 file changed, 1 insertion(+) diff --git a/test/unit/providers/fetch/goFetchTests.js b/test/unit/providers/fetch/goFetchTests.js index b075fec1..fbc4fe8b 100644 --- a/test/unit/providers/fetch/goFetchTests.js +++ b/test/unit/providers/fetch/goFetchTests.js @@ -47,6 +47,7 @@ describe('Go Proxy fetching', () => { beforeEach(() => { const requestPromiseStub = options => { if (options.url) { + expect(options.url).to.contain(stub) if (options.url.includes('error')) throw new Error('yikes') if (options.url.includes('code')) throw { statusCode: 500, message: 'Code' } if (options.url.includes('missing')) throw { statusCode: 404 } From 1c6e85508bd1199d5317ff44ef5f01711a870961 Mon Sep 17 00:00:00 2001 From: yashkohli88 Date: Wed, 17 Apr 2024 16:18:18 +0530 Subject: [PATCH 20/22] Updated URL as variable for go packages --- providers/fetch/goFetch.js | 8 ++++++-- test/unit/providers/fetch/goFetchTests.js | 14 +++++++------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/providers/fetch/goFetch.js b/providers/fetch/goFetch.js index 371fbd4d..d8790100 100644 --- a/providers/fetch/goFetch.js +++ b/providers/fetch/goFetch.js @@ -9,6 +9,10 @@ const { parse: htmlParser } = require('node-html-parser') const { parse: spdxParser } = require('@clearlydefined/spdx') const FetchResult = require('../../lib/fetchResult') +const providerMap = { + golang: 'https://proxy.golang.org' +} + class GoFetch extends AbstractFetch { constructor(options) { super(options) @@ -69,7 +73,7 @@ class GoFetch extends AbstractFetch { } async _getLatestVersion(spec) { - const initial_url = `https://proxy.golang.org/${spec.namespace}/${spec.name}/@v/list` + const initial_url = `${providerMap.golang}/${spec.namespace}/${spec.name}/@v/list` const replace_encoded_url = this._replace_encodings(initial_url) const url = replace_encoded_url.replace(/null\//g, '') @@ -89,7 +93,7 @@ class GoFetch extends AbstractFetch { } _buildUrl(spec, extension = '.zip') { - let initial_url = `https://proxy.golang.org/${spec.namespace}/${spec.name}/@v/${spec.revision}${extension}` + let initial_url = `${providerMap.golang}/${spec.namespace}/${spec.name}/@v/${spec.revision}${extension}` return this._replace_encodings(this._remove_blank_fields(initial_url)) } diff --git a/test/unit/providers/fetch/goFetchTests.js b/test/unit/providers/fetch/goFetchTests.js index fbc4fe8b..237d9819 100644 --- a/test/unit/providers/fetch/goFetchTests.js +++ b/test/unit/providers/fetch/goFetchTests.js @@ -9,16 +9,16 @@ const Request = require('../../../../ghcrawler').request const fs = require('fs') const { merge } = require('lodash') -const stub = 'https://proxy.golang.org/' +const goBaseURL = 'https://proxy.golang.org/' describe('Go utility functions', () => { it('builds URLs', () => { const fetch = GoFetch({}) - expect(fetch._buildUrl(spec('go', 'golang', 'cloud.google.com', 'go', 'v0.56.0'))).to.equal(stub + 'cloud.google.com/go/@v/v0.56.0.zip') - expect(fetch._buildUrl(spec('go', 'golang', 'cloud.google.com', 'go', 'v0.56.0'), '.mod')).to.equal(stub + 'cloud.google.com/go/@v/v0.56.0.mod') - expect(fetch._buildUrl(spec('go', 'golang', '-', 'collectd.org', 'v0.5.0'))).to.equal(stub + 'collectd.org/@v/v0.5.0.zip') - expect(fetch._buildUrl(spec('go', 'golang', 'github.com%2fAzure%2fazure-event-hubs-go', 'v3', 'v3.2.0'))).to.equal(stub + 'github.com/Azure/azure-event-hubs-go/v3/@v/v3.2.0.zip') - expect(fetch._buildUrl(spec('go', 'golang', 'github.com%2FAzure%2Fazure-event-hubs-go', 'v3', 'v3.2.0'))).to.equal(stub + 'github.com/Azure/azure-event-hubs-go/v3/@v/v3.2.0.zip') + expect(fetch._buildUrl(spec('go', 'golang', 'cloud.google.com', 'go', 'v0.56.0'))).to.equal(goBaseURL + 'cloud.google.com/go/@v/v0.56.0.zip') + expect(fetch._buildUrl(spec('go', 'golang', 'cloud.google.com', 'go', 'v0.56.0'), '.mod')).to.equal(goBaseURL + 'cloud.google.com/go/@v/v0.56.0.mod') + expect(fetch._buildUrl(spec('go', 'golang', '-', 'collectd.org', 'v0.5.0'))).to.equal(goBaseURL + 'collectd.org/@v/v0.5.0.zip') + expect(fetch._buildUrl(spec('go', 'golang', 'github.com%2fAzure%2fazure-event-hubs-go', 'v3', 'v3.2.0'))).to.equal(goBaseURL + 'github.com/Azure/azure-event-hubs-go/v3/@v/v3.2.0.zip') + expect(fetch._buildUrl(spec('go', 'golang', 'github.com%2FAzure%2Fazure-event-hubs-go', 'v3', 'v3.2.0'))).to.equal(goBaseURL + 'github.com/Azure/azure-event-hubs-go/v3/@v/v3.2.0.zip') }) }) @@ -47,7 +47,7 @@ describe('Go Proxy fetching', () => { beforeEach(() => { const requestPromiseStub = options => { if (options.url) { - expect(options.url).to.contain(stub) + expect(options.url).to.contain(goBaseURL) if (options.url.includes('error')) throw new Error('yikes') if (options.url.includes('code')) throw { statusCode: 500, message: 'Code' } if (options.url.includes('missing')) throw { statusCode: 404 } From e203f8b03d773ed48afe60ac267876eec63d427e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lukas=20Spie=C3=9F?= Date: Wed, 17 Apr 2024 17:22:12 +0200 Subject: [PATCH 21/22] Rename providerDictionary to providerMap in gitCloner.js --- providers/fetch/gitCloner.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/providers/fetch/gitCloner.js b/providers/fetch/gitCloner.js index 00b80615..c28f9ff1 100644 --- a/providers/fetch/gitCloner.js +++ b/providers/fetch/gitCloner.js @@ -7,7 +7,7 @@ const { clone } = require('lodash') const rimraf = require('rimraf') const FetchResult = require('../../lib/fetchResult') -const providerDictionary = { +const providerMap = { gitlab: 'https://gitlab.com', github: 'https://github.com' } @@ -95,7 +95,7 @@ class GitCloner extends AbstractFetch { _buildUrl(spec) { const fullName = `${spec.namespace.replace(/\./g, '/')}/${spec.name}` - return `${providerDictionary[spec.provider]}/${fullName}.git` + return `${providerMap[spec.provider]}/${fullName}.git` } } From 63526a8943e9fd043633138ae6de254e15e2a48c Mon Sep 17 00:00:00 2001 From: "E. Lynette Rayle" Date: Mon, 29 Apr 2024 19:37:38 -0400 Subject: [PATCH 22/22] 1.0.2 --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index 594d8adc..483177c9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "clearlydefined-crawler", - "version": "0.1.1", + "version": "1.0.2", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "clearlydefined-crawler", - "version": "0.1.1", + "version": "1.0.2", "hasInstallScript": true, "license": "MIT", "dependencies": { diff --git a/package.json b/package.json index 8a07af1e..63c929bb 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "clearlydefined-crawler", - "version": "0.1.1", + "version": "1.0.2", "description": "A crawler that walks projects and packages looking for data of interest to the ClearlyDefined project.", "main": "./index.js", "scripts": {