From 6e7c714b4a9f0391c819548512ed7f868a788b80 Mon Sep 17 00:00:00 2001 From: Breck Yunits Date: Mon, 20 May 2024 07:02:17 -0400 Subject: [PATCH] Refactor CLI and add command line search --- ScrollSet.js | 108 +++++++++++++++++++++++++++++++++++++ cli.js | 149 ++++++++------------------------------------------- 2 files changed, 130 insertions(+), 127 deletions(-) create mode 100644 ScrollSet.js diff --git a/ScrollSet.js b/ScrollSet.js new file mode 100644 index 000000000000..22abce92ed70 --- /dev/null +++ b/ScrollSet.js @@ -0,0 +1,108 @@ +const path = require("path") +const lodash = require("lodash") + +const { TreeNode } = require("jtree/products/TreeNode.js") +const { Utils } = require("jtree/products/Utils.js") +const { Disk } = require("jtree/products/Disk.node.js") + +class ScrollSetCLI { + constructor() { + this.quickCache = {} + } + + importCommand(filename) { + // todo: add support for updating as well + const processEntry = (node, index) => { + const filename = node.get("filename") + node.delete("filename") + const target = path.join(__dirname, "concepts", filename) + Disk.write(target, new TreeNode(Disk.read(target)).patch(node).toString()) + console.log(`Processed ${filename}`) + } + + const extension = filename.split(".").pop() + + if (extension === "csv") TreeNode.fromCsv(Disk.read(filename)).forEach(processEntry) + + if (extension === "tsv") TreeNode.fromTsv(Disk.read(filename)).forEach(processEntry) + + if (extension === "tree") TreeNode.fromDisk(filename).forEach(processEntry) + } + + get searchIndex() { + if (!this.quickCache.searchIndex) this.quickCache.searchIndex = this.makeNameSearchIndex() + return this.quickCache.searchIndex + } + + makeFilePath(id) { + return path.join(this.conceptsFolder, id.replace(".scroll", "") + ".scroll") + } + + getTree(file) { + return new TreeNode(Disk.read(this.makeFilePath(file.filename))) + } + + setAndSave(file, measurementPath, measurementValue) { + const tree = this.getTree(file) + tree.set(measurementPath, measurementValue) + return this.save(file, tree) + } + + save(file, tree) { + const dest = this.makeFilePath(file.filename) + return Disk.write(dest, tree.toString()) + } + + makeNameSearchIndex(files = this.concepts.slice(0).reverse()) { + const map = new Map() + files.forEach(parsedConcept => { + const id = parsedConcept.filename.replace(".scroll", "") + this.makeNames(parsedConcept).forEach(name => map.set(name.toLowerCase(), parsedConcept)) + }) + return map + } + + makeNames(concept) { + return [concept.filename.replace(".scroll", ""), concept.id].filter(i => i) + } + + searchForConcept(query) { + if (query === undefined || query === "") return + const { searchIndex } = this + return ( + searchIndex.get(query) || searchIndex.get(query.toLowerCase()) || searchIndex.get(Utils.titleToPermalink(query)) + ) + } + + searchForConceptCommand(query) { + console.log(lodash.pickBy(this.searchForConcept(query), lodash.identity)) + } + + grammarFile = "" + scrollSetName = "myScrollSet" + + get concepts() { + return require(this.compiledConcepts) + } + + buildGrammarFileCommand() { + const code = `node_modules/scroll-cli/grammar/cellTypes.grammar +node_modules/scroll-cli/grammar/root.grammar +node_modules/scroll-cli/grammar/comments.grammar +node_modules/scroll-cli/grammar/blankLine.grammar +node_modules/scroll-cli/grammar/measures.grammar +node_modules/scroll-cli/grammar/import.grammar +node_modules/scroll-cli/grammar/errors.grammar +${this.grammarFile}` + .trim() + .split("\n") + .map(filepath => Disk.read(path.join(__dirname, filepath))) + .join("\n\n") + .replace("catchAllParser catchAllParagraphParser", "catchAllParser errorParser") + .replace(/^importOnly\n/gm, "") + .replace(/^import .+/gm, "") + Disk.write(path.join(__dirname, `${this.scrollSetName}.grammar`), code) + } +} + +module.exports = { ScrollSetCLI } diff --git a/cli.js b/cli.js index 7e33ae0427c9..279183604ce2 100755 --- a/cli.js +++ b/cli.js @@ -1,31 +1,37 @@ #! /usr/bin/env node const path = require("path") -const numeral = require("numeral") -const lodash = require("lodash") -const dayjs = require("dayjs") const { TreeNode } = require("jtree/products/TreeNode.js") const { Utils } = require("jtree/products/Utils.js") -const { shiftRight, removeReturnChars } = Utils const { Disk } = require("jtree/products/Disk.node.js") +const { ScrollSetCLI } = require("./ScrollSet.js") const baseFolder = path.join(__dirname) const ignoreFolder = path.join(baseFolder, "ignore") -const pagesDir = path.join(baseFolder, "pages") -const listsFolder = path.join(baseFolder, "lists") -const conceptsFolder = path.join(baseFolder, "concepts") - -class PLDBCli { - constructor() { - this.quickCache = {} - } +class PLDBCli extends ScrollSetCLI { get keywordsOneHotCsv() { if (!this.quickCache.keywordsOneHotCsv) this.quickCache.keywordsOneHotCsv = new TreeNode(this.keywordsOneHot).asCsv return this.quickCache.keywordsOneHotCsv } + conceptsFolder = path.join(baseFolder, "concepts") + grammarFile = "code/measures.scroll" + scrollSetName = "pldb" + compiledConcepts = "./pldb.json" + + makeNames(concept) { + return [ + concept.filename.replace(".scroll", ""), + concept.id, + concept.standsFor, + concept.githubLanguage, + concept.wikipediaTitle, + concept.aka + ].filter(i => i) + } + get keywordsOneHot() { if (this.quickCache.keywordsOneHot) return this.quickCache.keywordsOneHot const { keywordsTable } = this @@ -46,33 +52,11 @@ class PLDBCli { return rows } - // addRedirects(app) { - // // /languages => /truebase redirect - // app.get("/languages/:id", (req, res, next) => res.status(302).redirect(`/concepts/${req.params.id}`)) - - // const redirects = Disk.read(path.join(siteFolder, "redirects.txt")) - // .split("\n") - // .map(line => { - // const [oldUrl, newUrl] = line.split(" ") - // return { - // oldUrl, - // newUrl - // } - // }) - // redirects.forEach(redirect => - // app.get(`/${redirect.oldUrl}`, (req, res) => res.status(301).redirect(redirect.newUrl)) - // ) - // } - - get pldb() { - return require("./pldb.json") - } - async crawlGitHubCommand() { // Todo: figuring out best repo orgnization for crawlers. // Note: this currently assumes you have measurementscrawlers project installed separateely. const { GitHubImporter } = require("../measurementscrawlers/github.com/GitHub.js") - const importer = new GitHubImporter(this.pldb, conceptsFolder) + const importer = new GitHubImporter(this.concepts, this.conceptsFolder) await importer.fetchAllRepoDataCommand() await importer.writeAllRepoDataCommand() } @@ -82,7 +66,7 @@ class PLDBCli { // Note: this currently assumes you have measurementscrawlers project installed separateely. const { RedditImporter } = require("../measurementscrawlers/reddit.com/Reddit.js") - const importer = new RedditImporter(this.pldb, conceptsFolder) + const importer = new RedditImporter(this.concepts, this.conceptsFolder) await importer.createFromAnnouncementsCommand() } @@ -91,7 +75,7 @@ class PLDBCli { // Todo: figuring out best repo orgnization for crawlers. // Note: this currently assumes you have measurementscrawlers project installed separateely. const gitsFolder = path.join(ignoreFolder, "node_modules", "gits") // toss in a fake "node_modules" folder to avoid a "scroll list" scan. hacky i know. - this.pldb.forEach(async file => { + this.concepts.forEach(async file => { const { mainRepo } = file if (!mainRepo) return const targetFolder = path.join(gitsFolder, file.filename.replace(".scroll", "")) @@ -102,7 +86,7 @@ class PLDBCli { const gitStats = new GitStats(mainRepo, targetFolder) if (!Disk.exists(targetFolder)) gitStats.clone() - const targetPath = path.join(conceptsFolder, file.filename) + const targetPath = path.join(this.conceptsFolder, file.filename) const tree = new TreeNode(Disk.read(targetPath)) tree.touchNode("repoStats").setProperties(gitStats.summary) if (!tree.has("appeared")) tree.set("appeared", gitStats.firstCommit.toString()) @@ -113,58 +97,6 @@ class PLDBCli { }) } - get searchIndex() { - if (!this.quickCache.searchIndex) this.quickCache.searchIndex = this.makeNameSearchIndex() - return this.quickCache.searchIndex - } - - makeFilePath(id) { - return path.join(conceptsFolder, id.replace(".scroll", "") + ".scroll") - } - - getTree(file) { - return new TreeNode(Disk.read(this.makeFilePath(file.filename))) - } - - setAndSave(file, measurementPath, measurementValue) { - const tree = this.getTree(file) - tree.set(measurementPath, measurementValue) - return this.save(file, tree) - } - - save(file, tree) { - const dest = this.makeFilePath(file.filename) - return Disk.write(dest, tree.toString()) - } - - makeNameSearchIndex(files = this.pldb.slice(0).reverse()) { - const map = new Map() - files.forEach(parsedConcept => { - const id = parsedConcept.filename.replace(".scroll", "") - this.makeNames(parsedConcept).forEach(name => map.set(name.toLowerCase(), parsedConcept)) - }) - return map - } - - makeNames(concept) { - return [ - concept.filename.replace(".scroll", ""), - concept.id, - concept.standsFor, - concept.githubLanguage, - concept.wikipediaTitle, - concept.aka - ].filter(i => i) - } - - searchForConcept(query) { - if (query === undefined || query === "") return - const { searchIndex } = this - return ( - searchIndex.get(query) || searchIndex.get(query.toLowerCase()) || searchIndex.get(Utils.titleToPermalink(query)) - ) - } - searchForConceptByFileExtensions(extensions = []) { const { extensionsMap } = this const hit = extensions.find(ext => extensionsMap.has(ext)) @@ -179,43 +111,6 @@ class PLDBCli { return extensionsMap } - - buildGrammarFileCommand() { - const code = `node_modules/scroll-cli/grammar/cellTypes.grammar -node_modules/scroll-cli/grammar/root.grammar -node_modules/scroll-cli/grammar/comments.grammar -node_modules/scroll-cli/grammar/blankLine.grammar -node_modules/scroll-cli/grammar/measures.grammar -node_modules/scroll-cli/grammar/import.grammar -node_modules/scroll-cli/grammar/errors.grammar -code/measures.scroll` - .split("\n") - .map(filepath => Disk.read(path.join(__dirname, filepath))) - .join("\n\n") - .replace("catchAllParser catchAllParagraphParser", "catchAllParser errorParser") - .replace(/^importOnly\n/gm, "") - .replace(/^import .+/gm, "") - Disk.write(path.join(__dirname, "pldb.grammar"), code) - } - - importCommand(filename) { - // todo: add support for updating as well - const processEntry = (node, index) => { - const filename = node.get("filename") - node.delete("filename") - const target = path.join(__dirname, "concepts", filename) - Disk.write(target, new TreeNode(Disk.read(target)).patch(node).toString()) - console.log(`Processed ${filename}`) - } - - const extension = filename.split(".").pop() - - if (extension === "csv") TreeNode.fromCsv(Disk.read(filename)).forEach(processEntry) - - if (extension === "tsv") TreeNode.fromTsv(Disk.read(filename)).forEach(processEntry) - - if (extension === "tree") TreeNode.fromDisk(filename).forEach(processEntry) - } } module.exports = { PLDBCli }