add spell-checking to CI

abdulrahman305 · Oct 26, 2023 · 41e43df · 41e43df
1 parent 37a78e9
commit 41e43df
Show file tree

Hide file tree

Showing 112 changed files with 1,311 additions and 259 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -31,8 +31,8 @@
 		"*.jsx": "${capture}.js",
 		"*.tsx": "${capture}.ts, ${capture}.hooks.ts, ${capture}.hooks.tsx, ${capture}.stories.tsx, ${capture}.story.tsx, ${capture}.spec.tsx, ${capture}.base.ts, ${capture}.base.tsx, ${capture}.types.ts, ${capture}.styles.ts, ${capture}.styles.tsx, ${capture}.utils.ts, ${capture}.utils.tsx, ${capture}.constants.ts, ${capture}.module.scss, ${capture}.module.css, ${capture}.md, ${capture}.css",
 		"tsconfig.json": "tsconfig.*.json",
-		"package.json": "turbo.json, tsconfig.json, rome.json, .npmignore",
-		"README.md": "SECURITY.md, SUPPORT.md, CODE_OF_CONDUCT.md, LICENSE",
+		"package.json": "package-lock.json, turbo.json, tsconfig.json, rome.json, .npmignore, dictionary.txt, cspell.config.yaml",
+		"README.md": "SECURITY.md, SUPPORT.md, CODE_OF_CONDUCT.md, LICENSE, CODEOWNERS",
 		".eslintrc": ".eslintignore",
 		".prettierrc": ".prettierignore",
 		".gitattributes": ".gitignore",
@@ -52,5 +52,15 @@
 			"source.organizeImports": true
 		}
 	},
-	"autoDocstring.docstringFormat": "sphinx"
+	"autoDocstring.docstringFormat": "sphinx",
+	"cSpell.customDictionaries": {
+		"project-words": {
+			"name": "project-words",
+			"path": "${workspaceRoot}/dictionary.txt",
+			"description": "Words used in this project",
+			"addWords": true
+		},
+		"custom": true,
+		"internal-terms": true
+	}
 }
diff --git a/README.md b/README.md
@@ -7,13 +7,13 @@ There are four goals of the project:
 1.  Create a shareable client/server schema for serialized wrangling instructions. This is in the ./schema folder. TypeScript types and JSONSchema generation is in javascript/schema, and published schemas are copied out to ./schema along with test cases that are executed by JavaScript and Python builds to ensure parity.
 2.  Maintain an implementation of a basic client-side wrangling engine (largely based on [Arquero](https://github.com/uwdata/arquero)). This is in the ./javascript folder.
 3.  Maintain a python implementation using common wrangling libraries (e.g., [pandas](https://pandas.pydata.org/)) for backend or data science deployments. This is in the ./python folder.
-4.  Provide some reusable React components so wrangling operations can be incorporated into webapps easily. This is in the ./javascript/react folder.
+4.  Provide some reusable React components so wrangling operations can be incorporated into web applications easily. This is in the ./javascript/react folder.
 
 Individual documentation for the JavaScript and Python implementations can be found in their respective folders. Broad documentation about building pipelines and the available verbs is available in the [docs](docs) folder
 
 We currently have six primary JavaScript packages:
 
-- [react](javascript/react/docs/markdown/index.md) - this is a set of React components for each verb that you can include in web apps that enable tranformation pipeline building.
+- [react](javascript/react/docs/markdown/index.md) - this is a set of React components for each verb that you can include in web apps that enable transformation pipeline building.
 - [schema](javascript/schema/docs/markdown/index.md) - this is a set of core types and associated JSONSchema definitions for formalizing our data package and resource models (including the definitions for table parsing, Codebooks, and Workflows).
 - [tables](javascript/tables/docs/markdown/index.md) - this is the primary set of utilities for loading and parsing data tables, using Arquero under the hood.
 - [utilities](javascript/utilities/docs/markdown/index.md) - this is a set of helpers for working with files, etc., to ease building data wrangling applications.

diff --git a/cspell.config.yaml b/cspell.config.yaml
@@ -0,0 +1,26 @@
+$schema: https://raw.githubusercontent.com/streetsidesoftware/cspell/main/cspell.schema.json
+version: '0.2'
+allowCompoundWords: true
+dictionaryDefinitions:
+  - name: dictionary
+    path: './dictionary.txt'
+    addWords: true
+dictionaries:
+  - dictionary
+ignorePaths:
+  - 'node_modules'
+  - 'storybook-static'
+  - 'output'
+  - 'dist'
+  - 'build'
+  - 'javascript/*/docs'
+  - './javascript/webapp/public/schema'
+  - './schema'
+  - .turbo
+  - '*.csv'
+  - '*.parquet'
+  - '*.arrow'
+  - smoking.json
+  - __pycache__
+  - pyproject.toml
+  - '*.ipynb'
diff --git a/dictionary.txt b/dictionary.txt
@@ -0,0 +1,94 @@
+# Python Idioms
+PYTHONPATH
+pycache
+nopython
+virtualenv
+pyproject
+ipynb
+pymodule
+
+# JavaScript Idioms
+QNAN
+href
+hrefs
+noscript
+
+# Libraries
+Arquero
+pandarallel
+numpy
+linspace
+immer
+ahooks
+fluentui
+
+# Library Methods/Args
+Expando
+dtype
+mkdirp
+iloc
+virtualenvs
+iterrows
+dropna
+astype
+aggfunc
+fillna
+isna
+arange
+bindvar
+atable
+strptime
+isin
+
+# Technical Terms / Studies
+Freedman-Diaconis
+Doane
+Sturges
+NHEFS
+NHANES
+Hyattsville
+NCHS
+QUDT
+Subform
+binarized
+binnable
+
+# Verbs
+binarize
+genid
+umap
+concat
+onehot
+unhot
+groupby
+ungroup
+unorder
+rollup
+dedupe
+cume_dist
+
+# Args & Functions
+stdevp
+nand
+xnor
+nunique
+unapply
+unlisten
+toposort
+castable
+stdev
+
+# Corporate Terms
+MSRC
+msrc
+Dayenne
+Souza
+Carvajal
+Worthen
+Blanco
+
+# Test
+derp
+Hola
+ABCDEFGHIJKLMPQRSTUVWXYZ
+ZNGA
diff --git a/docs/README.md b/docs/README.md
@@ -1,4 +1,4 @@
-The core idea with these components is largely analagous to the object-oriented chain-of-responsibility pattern. We construct a workflow, which is a series of table transformation steps (e.g., middleware). We supply a table store to read and write to (e.g., context). After workflow execution is complete, we retrieve one or more output tables from the context.
+The core idea with these components is largely analogous to the object-oriented chain-of-responsibility pattern. We construct a workflow, which is a series of table transformation steps (e.g., middleware). We supply a table store to read and write to (e.g., context). After workflow execution is complete, we retrieve one or more output tables from the context.
 
 The fundamental unit of work in the system is a **verb**. Verbs represent primitive operations that return a table. Most verbs require an input table to transform.
 

diff --git a/docs/datatypes.md b/docs/datatypes.md
@@ -3,9 +3,9 @@ Data types present a number of thorny edge cases when dealing with different lan
 
 ## Common tricky use cases:
 - Text-based data files may contain strings that represent primitive values. Parsing these files should respect the data file's intent even if it overrides default language behavior. The most common example of this is probably boolean data columns with the values "true" and "false". JavaScript will naturally parse any non-empty string as `true`, so "false" -> `true`. A similar situation has been observed with "null".
-- Dates can be represented in a wide variey of formats, and parsing/guessing implementations differ by platform and library.
+- Dates can be represented in a wide variety of formats, and parsing/guessing implementations differ by platform and library.
   - `new Date()` in JavaScript is problematic, and may also conflict with pandas' [date guessing](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html?highlight=date#date-handling).
-- Autotomatic type discovery for columns is performed by both Arquero and pandas, but may have different results.
+- Automatic type discovery for columns is performed by both Arquero and pandas, but may have different results.
 - Some verbs can only be performed on certain data types, and other verbs can work with different data types but have different operators available. For example:
   - [bin](./verbs/bin.md) requires numeric input types.
   - [filter](./verbs/filter.md) requires different comparison operators depending on type (e.g., string 'contains' versus numeric 'less than').
@@ -19,8 +19,8 @@ The following rules will be observed across implementations to ensure consistent
 - Pandas' [missing data logic](https://pandas.pydata.org/pandas-docs/stable/user_guide/missing_data.html#missing-data) will be used for computations and boolean evaluations.
   - In general, this means null values are carried forward and may result in null outputs.
   - For boolean comparisons, null propagation is situation-dependent (see [three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic#Kleene_and_Priest_logics)). For example, if any operand in an OR comparison is `true`, the evaluation can return `true` even with nulls present.
-- Coercing unparseable strings to dates will result in an `Invalid Date` (JavaScript) or `NaT` (pandas.to_datetime with errors='coerce').
-- Coercing unparseable strings to numbers will result in `NaN` (pandas.to_numeric with errors='coerce').
+- Coercing unparsable strings to dates will result in an `Invalid Date` (JavaScript) or `NaT` (pandas.to_datetime with errors='coerce').
+- Coercing unparsable strings to numbers will result in `NaN` (pandas.to_numeric with errors='coerce').
 - When reading text files, the pandas default strings for [missing values](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#na-values) and [booleans](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#boolean-values) will be used.
 - [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) will be used for standard date formatting. Other date formats will not be auto-guessed.
   - When providing a custom parse or format pattern, we follow python and use the [1989 C standard tokens](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior). [d3-time-format](https://github.com/d3/d3-time-format) supports this standard for JavaScript.

diff --git a/docs/resources/tablebundle/index.md b/docs/resources/tablebundle/index.md
@@ -2,7 +2,7 @@
 
 The **table bundle** brings together all of the component resources to fully materialize a table for analytic use. This can include a base [data table](../datatable/index.md) (e.g., CSV or JSON data file), a [codebook](../codebook/index.md) that defines the schema, and a [workflow](../workflow/index.md) defining transformations to apply.
 
-You can contruct table bundles in a variety of ways, including symlinking from one to another to create derived collections that dynamically update as child dependencies are modified.
+You can construct table bundles in a variety of ways, including symlinking from one to another to create derived collections that dynamically update as child dependencies are modified.
 
 ## Table view
 

diff --git a/docs/verbs/bin.md b/docs/verbs/bin.md
@@ -7,7 +7,7 @@ _The input column for a binning operation must be a numeric data type._
 Multiple binning strategies are supported. Please see the [numpy documentation](https://numpy.org/doc/stable/reference/generated/numpy.histogram_bin_edges.html) for detailed descriptions of the algorithms.
 
 - Auto: uses automatic bin boundary guessing to create optimal default bins.
-- Fd: Freedman diaconis estimator, resilient to outliers.
+- Fd: Freedman-Diaconis estimator, resilient to outliers.
 - Doane: Better for non-normal datasets.
 - Scott: Less robust but takes data variability into account.
 - Stone: Based on leave-one-out cross-validation.

diff --git a/docs/verbs/rollup.md b/docs/verbs/rollup.md
@@ -18,7 +18,7 @@ Performs aggregation operations on table columns. Normally the table should be [
 - `median`: finds the median of the values
 - `stdev`: computes the standard deviation of the values
 - `stdevp`: computes the population standard deviation of the values
-- `variance`: computes the variane of the values
+- `variance`: computes the variance of the values
 - `array_agg`: collects all of the values in an array
 - `array_agg_distinct`: collects all of the unique values in an array
 

diff --git a/javascript/app-framework/README.md b/javascript/app-framework/README.md
@@ -1,6 +1,6 @@
 # app-framework
 
-The DataShaper app-framework package provides infrastructure for creating new applications that include core DataShaper functionality by default, as well as extensibility to build your own interfaces that are managedby the system consistently.
+The DataShaper app-framework package provides infrastructure for creating new applications that include core DataShaper functionality by default, as well as extensibility to build your own interfaces that are managed by the system consistently.
 
 ## Resources
 

diff --git a/...cript/app-framework/docs/markdown/app-framework.datashaperappprops.classname.md b/...cript/app-framework/docs/markdown/app-framework.datashaperappprops.classname.md
diff --git a/javascript/app-framework/docs/markdown/app-framework.datashaperappprops.md b/javascript/app-framework/docs/markdown/app-framework.datashaperappprops.md
diff --git a/javascript/app-framework/docs/markdown/app-framework.md b/javascript/app-framework/docs/markdown/app-framework.md
diff --git a/javascript/app-framework/docs/markdown/app-framework.usetablebundleoutput.md b/javascript/app-framework/docs/markdown/app-framework.usetablebundleoutput.md
diff --git a/javascript/app-framework/docs/report/app-framework.api.json b/javascript/app-framework/docs/report/app-framework.api.json
diff --git a/javascript/app-framework/src/components/DataTableConfig/Parser/Delimiter.tsx b/javascript/app-framework/src/components/DataTableConfig/Parser/Delimiter.tsx
@@ -18,7 +18,7 @@ export const Delimiter: React.FC<{
 		useBoolean(false)
 	const [value, setValue] = useState(isOther ? selected : '')
 
-	const onDelimeterChange = useCallback(
+	const onDelimiterChange = useCallback(
 		(option?: IChoiceGroupOption) => {
 			if (option?.key === 'Other') {
 				customDelimiter()
@@ -38,7 +38,7 @@ export const Delimiter: React.FC<{
 		},
 	)
 
-	const onChangeCustomDelimeter = useCallback(
+	const onChangeCustomDelimiter = useCallback(
 		(
 			_: React.FormEvent<HTMLInputElement | HTMLTextAreaElement>,
 			newValue?: string,
@@ -58,15 +58,15 @@ export const Delimiter: React.FC<{
 				label='Delimiter'
 				defaultSelectedKey={selected}
 				options={delimiterOptions}
-				onChange={(_, option) => onDelimeterChange(option)}
+				onChange={(_, option) => onDelimiterChange(option)}
 			/>
 			<TextField
 				autoComplete='off'
 				title='custom delimiter'
 				name='customDelimiter'
 				disabled={!isOther}
 				value={value}
-				onChange={onChangeCustomDelimeter}
+				onChange={onChangeCustomDelimiter}
 			/>
 		</DelimiterContainer>
 	)

diff --git a/javascript/app-framework/src/components/DataTableConfig/Parser/Parser.tsx b/javascript/app-framework/src/components/DataTableConfig/Parser/Parser.tsx
@@ -49,8 +49,8 @@ export const Parser: React.FC<ParserProps> = memo(function Parser({ parser }) {
 			<FlexContainer>
 				<Delimiter
 					selected={delimiter}
-					onChange={(delim: string) => {
-						parser.delimiter = delim
+					onChange={(delimiter: string) => {
+						parser.delimiter = delimiter
 					}}
 				/>
 			</FlexContainer>

diff --git a/javascript/app-framework/src/components/app/DataShaperApp/DataShaperApp.types.ts b/javascript/app-framework/src/components/app/DataShaperApp/DataShaperApp.types.ts
@@ -9,7 +9,7 @@ import type { FileDefinition } from '../ResourcesPane/index.js'
 
 export interface DataShaperAppProps<T = unknown> {
 	/**
-	 * CSS Classname
+	 * CSS class name
 	 */
 	className?: string
 

diff --git a/javascript/app-framework/src/components/app/ResourcesPane/FileTreeCommands.hooks.ts b/javascript/app-framework/src/components/app/ResourcesPane/FileTreeCommands.hooks.ts
@@ -22,7 +22,7 @@ import type { FileDefinition } from './ResourcesPane.types.js'
 import { useLoadDataPackage } from '../../../hooks/useLoadDataPackage.js'
 
 /**
- * Gets the file-managament commandbar items
+ * Gets the file-management commandbar items
  *
  * @param examples - The provided examples
  * @param expanded - Whether the pane is expended

diff --git a/javascript/app-framework/src/components/app/ResourcesPane/ResourceTree.hooks.tsx b/javascript/app-framework/src/components/app/ResourcesPane/ResourceTree.hooks.tsx
@@ -65,7 +65,7 @@ function makeTreeItem(
 	// (a) rendered if empty, so that options can be selected, and
 	// (b) rendered _in place of_ the child resource, so we don't have redundant child entries.
 	// so:
-	// 1: iterate the field wells if present, creating a tree item for each. these should have no href, so are "unclickable"
+	// 1: iterate the field wells if present, creating a tree item for each. these should have no href, so are "un-clickable"
 	// 1.1: if any well has a selected key, then we don't need to render the child resource, save it for later
 	// 2: iterate the child resources and create an item for each one that isn't already marked from the wells
 	const handled = new Set<string>()

diff --git a/javascript/app-framework/src/components/editors/TableBundleEditor/TableBundleEditor.hooks.ts b/javascript/app-framework/src/components/editors/TableBundleEditor/TableBundleEditor.hooks.ts
@@ -92,7 +92,7 @@ export function useOnDeleteStep(
  * Get a function to call when a step is created
  * @param save - The save function to call when the step is created
  * @param selectOutput - A function to select the output after the step is created
- * @param dismissModal - The function used to dismill the modal
+ * @param dismissModal - The function used to dismiss the modal
  * @returns
  */
 export function useOnCreateStep(